In [1]:
import requests
from api_keys import rapid_api_key
import json
import os

In [2]:
def extract_temperature_data(weather_data):
    temperature_data = []
    processed_entries = set()  # Keep track of processed entries
    
    # Extract temperature data for December, January, and February
    for data_point in weather_data['data']:
        date = data_point['date']
        tavg = data_point.get('tavg')
        tmin = data_point.get('tmin')
        tmax = data_point.get('tmax')
        
        # Check if the entry is a duplicate based on date
        if date in processed_entries:
            continue
        
        # Check if all temperature values are None
        if tavg is None and tmin is None and tmax is None:
            continue

        # If any of the temperature values is None, use the available values
        if tavg is None:
            tavg = (tmin + tmax) / 2 if tmin is not None and tmax is not None else tmin or tmax

        # If tmin is None, use tavg instead
        if tmin is None:
            tmin = tavg

        # If tmax is None, use tavg instead
        if tmax is None:
            tmax = tavg
        
        # Extract month from date
        month = int(date.split('-')[1])
        
        # Check if month is December, January, or February
        if month in [12, 1, 2]:
            # Append the processed entry to temperature_data
            temperature_data.append({
                'date': date,
                'tavg': tavg,
                'tmin': tmin,
                'tmax': tmax
            })

            # Add the entry to the processed set
            processed_entries.add(date)
    
    return temperature_data


In [3]:
#This is the API function to pull down the weather data. Input Lat/lon of the requested city to get the weather data.
#make sure to set up your api_keys file before running this function
def get_weather_data(lat, lon):
    url = "https://meteostat.p.rapidapi.com/point/daily"

    querystring = {
        "lat": str(lat),
        "lon": str(lon),
        "start": "2010-12-01",
        "end": "2020-02-29",
        "units": 'imperial'
    }

    headers = {
        "X-RapidAPI-Key": rapid_api_key,
        "X-RapidAPI-Host": "meteostat.p.rapidapi.com"
    }

    response = requests.get(url, headers=headers, params=querystring)

    weather_data = response.json()

    return weather_data

In [4]:
# Functions for conversion and export
def export_data_to_json(data, filename):
    with open(filename, 'w') as file:
        json.dump(data, file)

def export_datasets(datasets):
    data_folder = 'data'
    subfolder = 'weather'

    # Create 'data' folder if it doesn't exist
    if not os.path.exists(data_folder):
        os.makedirs(data_folder)

    # Create 'weather' subfolder if it doesn't exist
    weather_folder = os.path.join(data_folder, subfolder)
    if not os.path.exists(weather_folder):
        os.makedirs(weather_folder)

    for dataset_name, dataset in datasets.items():
        filename = os.path.join(weather_folder, f"{dataset_name}_data.json")
        export_data_to_json(dataset, filename)


In [5]:
#get data
la = get_weather_data(34, 118)
chi = get_weather_data(41, 87)
detroit = get_weather_data(42, 83)
milwaukee = get_weather_data(43, 87)
nyc = get_weather_data(40, -73)
columbus = get_weather_data(40, -83)
philly = get_weather_data(39, -75)
newark = get_weather_data(40, -74)
houston = get_weather_data(29, -95)
indianapolis = get_weather_data(39, -86)
milwaukee = get_weather_data(43, -87)

In [6]:
#extract data
la_data = extract_temperature_data(la)
nyc_data = extract_temperature_data(nyc)
chi_data = extract_temperature_data(chi)
detroit_data = extract_temperature_data(detroit)
columbus_oh_data = extract_temperature_data(columbus)
philly_data = extract_temperature_data(philly)
newark_data = extract_temperature_data(newark)
houston_data = extract_temperature_data(houston)
indianapolis_data = extract_temperature_data(indianapolis)
milwaukee_data = extract_temperature_data(milwaukee)

In [7]:
# Dictionary to hold all datasets to export
datasets = {
    'Los Angeles': la_data,
    'New York City': nyc_data,
    'Chicago': chi_data,
    'Detroit': detroit_data,
    'Columbus': columbus_oh_data,
    'Philadelphia': philly_data,
    'Newark': newark_data,
    'Houston': houston_data,
    'Indianapolis': indianapolis_data,
    'Milwaukee': milwaukee_data
}


In [8]:
export_datasets(datasets)