In [None]:

# Using Python to load data into a database offers several advantages:

#  Automation: Python provides powerful libraries and tools for data manipulation and processing. By writing Python 
#  scripts or applications, you can automate the process of loading data into a database, reducing manual effort and 
#  potential errors.

#  Flexibility: Python's versatility allows you to work with various data formats, including CSV, JSON, Excel, and more. 
#  You can easily read data from different sources, transform it as needed, and load it into the database.

#  Integration: Python integrates seamlessly with popular database management systems (DBMS) through libraries like 
#  SQLAlchemy, pyodbc, and psycopg2. This allows you to connect to different types of databases (e.g., SQL, NoSQL) and 
#  interact with them programmatically.

#  Data Transformation: Python's extensive ecosystem of libraries (e.g., pandas, NumPy) provides powerful tools for data 
#  manipulation and transformation. You can clean, reshape, and preprocess data before inserting it into the database, 
#  ensuring data quality and consistency.

#  Customization: Python allows you to tailor the data loading process to your specific requirements. You can implement 
#  custom logic, error handling, and data validation as part of the loading process, ensuring that data is loaded 
#  correctly and efficiently.

#  Scalability: Python's scalability makes it suitable for handling large volumes of data. You can optimize the loading 
#  process to handle big data scenarios, parallelize tasks, and optimize performance for faster data ingestion.

#  Cross-Platform Compatibility: Python is platform-independent, meaning your data loading scripts can run on different 
#  operating systems (e.g., Windows, macOS, Linux) without modification. This makes it easy to deploy and manage data 
#  loading processes across different environments.

#  Open Source Ecosystem: Python's open-source nature means you have access to a vast ecosystem of libraries, frameworks, 
#  and community contributions. You can leverage existing solutions and best practices to streamline the data loading 
#  process and accelerate development.
        

In [2]:

# Load data from CSV file on PC, to a dynamically created new table in SQL Server
# pip install pyodbc
import csv
import pyodbc

# Establish connection to SQL Server with Username and Password
# conn_str = (
#     r'DRIVER={ODBC Driver 17 for SQL Server};'
#     r'SERVER=MyDevice\SQLEXPRESS01;'
#     r'DATABASE=my_test;'
#     r'UID=your_username;'
#     r'PWD=your_password;'
# )


# Establish connection to SQL Server using Windows authentication
conn_str = (
    r'DRIVER={ODBC Driver 17 for SQL Server};'
    r'SERVER=MyDevice\SQLEXPRESS01;'
    r'DATABASE=my_test;'
    r'Trusted_Connection=yes;'
)
conn = pyodbc.connect(conn_str)

cursor = conn.cursor()

# Read CSV file to get column names and data
csv_file = r'C:\Users\ryan_\Desktop\Book1.csv'
with open(csv_file, 'r', newline='') as csvfile:
    csvreader = csv.reader(csvfile)
    columns = next(csvreader)  # Get column names from header row

# Create SQL CREATE TABLE statement dynamically
create_table_query = f"CREATE TABLE NewTable ({', '.join(f'{col} VARCHAR(MAX)' for col in columns)})"

# Execute CREATE TABLE query
cursor.execute(create_table_query)
conn.commit()

# Read CSV data again to insert into the new table
with open(csv_file, 'r', newline='') as csvfile:
    csvreader = csv.reader(csvfile)
    next(csvreader)  # Skip header row
    for row in csvreader:
        # Generate INSERT INTO query dynamically
        insert_query = f"INSERT INTO NewTable VALUES ({', '.join('?' * len(row))})"
        
        # Execute INSERT INTO query
        cursor.execute(insert_query, row)
        conn.commit()

# Close cursor and connection
cursor.close()
conn.close()
        

In [1]:

# Load data from CSV file on PC, to dataframe, and then push dataframe to a dynamically created new table in SQL Server
# pip install pandas sqlalchemy
import pandas as pd
from sqlalchemy import create_engine

# Read CSV file into DataFrame
df = pd.read_csv(r'C:\\Users\\ryan_\\Book1.csv')

# Create connection to SQL Server
# In this case:
# My Server Name = 'MyDevice\SQLEXPRESS01'
# My Database Name = 'my_test'    
engine = create_engine('mssql+pyodbc://MyDevice\SQLEXPRESS01/my_test?driver=ODBC+Driver+17+for+SQL+Server')

# Insert data into SQL Server table
df.to_sql('MyTable', con=engine, if_exists='append', index=False)


In [4]:

# Load data from JSON file on PC, a dynamically created new table in MongoDB
import pymongo
import json
from pymongo import MongoClient, InsertOne

client = pymongo.MongoClient('mongodb://localhost:27017')
db = client.local
collection = db.NewTable
requesting = []

with open(r"C:\\Users\\ryan_\\odds_todaysGames.json") as f:
    for jsonObj in f:
        myDict = json.loads(jsonObj)
        requesting.append(InsertOne(myDict))

result = collection.bulk_write(requesting)
client.close()


In [3]:

# Load data from JSON file on PC, to dataframe, and then push dataframe to a dynamically created new table in MongoDB
import pandas as pd
from pymongo import MongoClient

# Load data from JSON file into a DataFrame
df = pd.read_json(r"C:\\Users\\ryan_\\odds_todaysGames.json")

# Convert DataFrame to list of dictionaries
data = df.to_dict(orient='records')

# Connect to MongoDB
client = MongoClient('localhost', 27017)
db = client.local
collection = db.TableJson

# Insert documents into MongoDB
collection.insert_many(data)

# Close MongoDB connection
client.close()


In [None]:

# JSON (JavaScript Object Notation) and XML (eXtensible Markup Language) are both popular formats for representing 
#  structured data. Here's a comparison between the two:

#  Syntax:
#  JSON uses a lightweight syntax that resembles JavaScript object notation. It consists of key-value pairs and supports arrays.
#  XML uses tags to define elements, attributes, and hierarchies. It's more verbose compared to JSON.

#  Readability:
#  JSON tends to be more readable and compact, especially for simple data structures.
#  XML can be more verbose and less human-readable due to its tag-based structure.

#  Data Types:
#  JSON supports basic data types such as strings, numbers, booleans, arrays, and objects. It does not have built-in 
#  support for dates or binary data.
#  XML is text-based and can represent complex data structures, but it does not have built-in support for specific data types.

#  Usage:
#  JSON is commonly used for web APIs, configuration files, and data interchange between systems due to its simplicity and 
#  ease of use.
#  XML is widely used in various domains, including web services (SOAP), document storage (e.g., XHTML), and configuration files.

#  Parsing:
#  JSON parsing is usually faster and more efficient compared to XML parsing, especially in JavaScript environments.
#  XML parsing can be more complex and may require more processing overhead, particularly for large documents.

#  Schema Support:
#  JSON does not have built-in schema support, although JSON Schema exists as a separate specification for defining JSON 
#  data structure constraints.
#  XML has built-in support for Document Type Definitions (DTD) and XML Schema Definition (XSD), which allow for stricter 
#  validation of document structure.

#  Tooling:
#  JSON is well-supported by modern programming languages and has extensive tooling available for parsing, validation, 
#  and manipulation.
#  XML has been around longer and has a rich ecosystem of tools and libraries for processing and transforming XML documents.
        

In [None]:

# Based on what we saw above, we can use a similar approach to insert an XML file into MongoDB. However, since XML is 
# more hierarchical in nature compared to JSON, you'll need to parse the XML file and map its structure to a MongoDB 
# document format.

# Parse the XML file and convert it into a Python dictionary, or list of dictionaries, representing MongoDB documents.

# Here's a general approach to insert XML data into MongoDB...
# Use the InsertOne or insert_many method to insert the documents into MongoDB.
    
import xml.etree.ElementTree as ET
import pymongo
from pymongo import MongoClient, InsertOne

# Connect to MongoDB
client = pymongo.MongoClient('mongodb://localhost:27017')
db = client.local
collection = db.NewTable
requesting = []

# Parse the XML file
tree = ET.parse(r'C:\Users\ryan_\test.xml')
root = tree.getroot()

# Convert XML to MongoDB documents
def xml_to_documents(element):
    doc = {}
    for child in element:
        if child.tag == 'change':
            doc[child.find('change_description').text] = child.find('new_value').text
        else:
            doc[child.tag] = child.attrib
            doc.update(xml_to_documents(child))
    return doc

# Iterate through top-level elements
for child in root:
    doc = xml_to_documents(child)
    requesting.append(InsertOne(doc))

# Perform bulk insert into MongoDB
result = collection.bulk_write(requesting)

# Close MongoDB connection
client.close()
print(result.inserted_count)


In [None]:

# HEre is how we can read the same XML file into a structured dataframe, and then push dataframe to a dynamically created new 
# table in MongoDB

import xml.etree.ElementTree as ET
import pandas as pd
import pymongo
from pymongo import MongoClient

# Step 1: Parse XML file and extract data into a DataFrame
tree = ET.parse(r'C:\Users\ryan_\test.xml')
root = tree.getroot()

data = []
for horse in root.findall('.//start_changes/horse'):
    horse_data = {'horse_name': horse.get('horse_name'), 'program_number': horse.get('program_number')}
    for change in horse.findall('change'):
        change_description = change.find('change_description').text
        new_value = change.find('new_value').text
        horse_data[change_description] = new_value
    data.append(horse_data)

df = pd.DataFrame(data)
print(df.head)

# Step 2: Connect to MongoDB and create a new collection dynamically
client = pymongo.MongoClient('mongodb://localhost:27017')
db = client.local
collection_name = 'NewCollection'  # Change this to your desired collection name
collection = db[collection_name]

# Step 3: Insert DataFrame records into MongoDB collection
records = df.to_dict(orient='records')
collection.insert_many(records)

# Close MongoDB connection
client.close()
print(result.inserted_count)


In [None]:

# Data Source for XML:
# urlfile = "http://www.equibase.com/premium/eqbLateChangeXMLDownload.cfm"

# Download XML file using Python

import urllib.request

urlfile = "http://www.equibase.com/premium/eqbLateChangeXMLDownload.cfm"

request = urllib.request.Request(urlfile, headers={'User-Agent': 'Mozilla'})
response = urllib.request.urlopen(request)
with open("C:\\Users\\ryan_\\test.xml", 'wb') as outfile:
    outfile.write(response.read())
    

In [2]:

# SQL Server and MongoDB are both popular database management systems, but they differ in several key aspects. Here are 
# some major similarities and differences between the two:

    

# Similarities:
#  Data Storage: Both SQL Server and MongoDB are used to store and manage data.
#  Indexes: Both support indexing to improve query performance.
#  Security: They offer security features such as user authentication, authorization, and encryption to protect data.
#  Backup and Recovery: Both provide mechanisms for backup and recovery of data to ensure data integrity and availability.
#  Scalability: Both SQL Server and MongoDB support scaling horizontally and vertically to handle increasing data volumes 
#  and user loads.



# Differences:

#  Data Model:
#  SQL Server: Follows a relational data model and stores data in tables with rows and columns. It uses SQL 
#  (Structured Query Language) for querying data using predefined schemas.
#  MongoDB: Follows a document-oriented data model and stores data in flexible, JSON-like documents. It does 
#  not require a predefined schema, allowing for dynamic and nested data structures. Queries are made using 
#  MongoDB's query language and operators.

#  Schema:
#  SQL Server: Requires a predefined schema where the structure of the data (tables, columns, data types, constraints) 
#  must be defined before data insertion.
#  MongoDB: Allows flexible schema design where documents within a collection can have varying structures. It supports 
#  dynamic schema evolution, enabling easy modification of data structures without downtime.

#  Query Language:
#  SQL Server: Uses SQL (Structured Query Language) for querying data. SQL provides a standardized syntax for interacting 
#  with relational databases.
#  MongoDB: Uses a query language similar to JavaScript and provides a rich set of operators for querying nested and 
#  complex data structures within documents.

#  Transactions:
#  SQL Server: Supports ACID (Atomicity, Consistency, Isolation, Durability) transactions for ensuring data integrity 
#  in multi-operation transactions.
#  MongoDB: Supports atomic operations on a single document but does not support multi-document transactions across 
#  multiple collections/documents in a single transaction.

#  Scaling:
#  SQL Server: Typically scales vertically by adding more resources (CPU, memory, storage) to a single server.
#  MongoDB: Scales horizontally by distributing data across multiple servers (sharding), allowing for linear scalability 
#  as data volume increases.

#  Community and Ecosystem:
#  SQL Server: Developed and maintained by Microsoft, with strong community support and a wide range of third-party tools 
#  and integrations.
#  MongoDB: Developed and maintained by MongoDB Inc., with an active open-source community and a rich ecosystem of 
#  libraries, frameworks, and tools.

#  Use Cases:
#  SQL Server: Well-suited for applications with structured and relational data, such as transactional systems, enterprise 
#  applications, and data warehouses.
#  MongoDB: Ideal for applications with unstructured or semi-structured data, real-time analytics, content management systems, 
#  and use cases requiring flexibility and scalability.

# Overall, the choice between SQL Server and MongoDB depends on factors such as the nature of your data, the complexity of 
#  your application, scalability requirements, and development preferences.
