In [1]:
from pymongo import MongoClient
import json
import glob
import os
import re
import datetime


In [2]:
def import_weather_data(file_path, schema, collection, city):
    with open(file_path) as file:
        data = json.load(file)
        for item in data:
            # Ensure temperature values are floats
            item['tavg'] = float(item['tavg']) if item['tavg'] else None
            item['tmax'] = float(item['tmax']) if item['tmax'] else None
            item['tmin'] = float(item['tmin']) if item['tmin'] else None

            item['city'] = city
            collection.insert_one(item)


In [3]:
# Function to import data from a JSON file
def import_futures_data(file_path, schema, collection, label):
    with open(file_path) as file:
        data = json.load(file)
        futures_data = []
        current_year = None
        for item in data:
            date_str = item['Date']
            year = datetime.datetime.strptime(date_str, "%Y-%m-%d").year
            if current_year is None:
                current_year = year
            elif year != current_year:
                # Process the futures data for the previous year
                process_futures_data(futures_data, schema, collection, label)
                futures_data = []
                current_year = year
            futures_data.append(item)
        
        # Process the last futures data
        process_futures_data(futures_data, schema, collection, label)

def process_futures_data(futures_data, schema, collection, label):
    # Insert the labeled data into the collection
    for item in futures_data:
        item['Label'] = label
    collection.insert_many(futures_data)

In [4]:
# Establish a connection to your MongoDB server
client = MongoClient('mongodb://localhost:27017/')
db = client['project_3_db']  
weather_collection = db['weather_data']
futures_collection = db['futures_data']


In [5]:
# Define the schema for the weather data
weather_schema = {
    'date': 'date',
    'tavg': 'float',
    'tmin': 'float',
    'tmax': 'float'
}

# Define the schema for the futures data
futures_schema = {
    'Date': 'date',
    'Open': 'double',
    'High': 'double',
    'Low': 'double',
    'Close': 'double',
    'Adj_Close': 'double',
    'Volume': 'int',
    'ATR': 'double'
}

In [6]:
base_dir = os.getcwd()

# Specify the relative directory paths for weather and futures data
weather_directory = os.path.join(base_dir, 'data', 'weather')
futures_directory = os.path.join(base_dir, 'data', 'futures')

In [7]:
# Import weather data from multiple files for each city
cities = ['Los Angeles', 'New York City', 'Chicago', 'Detroit', 'Columbus', 'Philadelphia', 'Newark', 'Houston', 'Indianapolis', 'Milwaukee']

for city in cities:
    collection = db['weather_data']  # Use the same collection for all cities
    city_file_path = os.path.join(weather_directory, f'{city}_data.json')
    import_weather_data(city_file_path, weather_schema, collection, city)

In [8]:
# Import futures data from multiple files
futures_files = os.listdir(futures_directory)
for index, file in enumerate(futures_files):
    if file.endswith('.json'):
        label = f'Winter {index + 1}'
        file_path = os.path.join(futures_directory, file)
        import_futures_data(file_path, futures_schema, futures_collection, label)

In [9]:
# Close the MongoDB connection
client.close()