In [1]:
import requests
from api_keys import rapid_api_key
import json
import os

In [2]:
def extract_temperature_data(weather_data):
    temperature_data = []
    processed_entries = set()  # Keep track of processed entries
    
    # Extract temperature data for December, January, and February
    for data_point in weather_data['data']:
        date = data_point['date']
        tavg = data_point.get('tavg')
        tmin = data_point.get('tmin')
        tmax = data_point.get('tmax')
        
        # Check if the entry is a duplicate based on date
        if date in processed_entries:
            continue
        
        # Check if all temperature values are None
        if tavg is None and tmin is None and tmax is None:
            continue

        # If any of the temperature values is None, use the available values
        if tavg is None:
            tavg = (tmin + tmax) / 2 if tmin is not None and tmax is not None else tmin or tmax

        # If tmin is None, use tavg instead
        if tmin is None:
            tmin = tavg

        # If tmax is None, use tavg instead
        if tmax is None:
            tmax = tavg
        
        # Extract month from date
        month = int(date.split('-')[1])
        
        # Check if month is December, January, or February
        if month in [12, 1, 2]:
            # Append the processed entry to temperature_data
            temperature_data.append({
                'date': date,
                'tavg': tavg,
                'tmin': tmin,
                'tmax': tmax
            })

            # Add the entry to the processed set
            processed_entries.add(date)
    
    return temperature_data


In [3]:
#This is the API function to pull down the weather data. Input Lat/lon of the requested city to get the weather data.
#make sure to set up your api_keys file before running this function
def get_weather_data(lat, lon):
    url = "https://meteostat.p.rapidapi.com/point/daily"

    querystring = {
        "lat": str(lat),
        "lon": str(lon),
        "start": "2011-01-01",
        "end": "2020-02-29",
        "units": 'imperial'
    }

    headers = {
        "X-RapidAPI-Key": rapid_api_key,
        "X-RapidAPI-Host": "meteostat.p.rapidapi.com"
    }

    response = requests.get(url, headers=headers, params=querystring)

    weather_data = response.json()

    return weather_data

In [4]:
# Functions for conversion and export
def export_data_to_json(data, filename):
    with open(filename, 'w') as file:
        json.dump(data, file)

def export_datasets(datasets):
    data_folder = 'data'
    subfolder = 'weather'

    # Create 'data' folder if it doesn't exist
    if not os.path.exists(data_folder):
        os.makedirs(data_folder)

    # Create 'weather' subfolder if it doesn't exist
    weather_folder = os.path.join(data_folder, subfolder)
    if not os.path.exists(weather_folder):
        os.makedirs(weather_folder)

    for dataset_name, dataset in datasets.items():
        filename = os.path.join(weather_folder, f"{dataset_name}_data.json")
        export_data_to_json(dataset, filename)


In [5]:
#get data
la = get_weather_data(34, 118)
chi = get_weather_data(41, 87)
detroit = get_weather_data(42, 83)
milwaukee = get_weather_data(43, 87)
nyc = get_weather_data(40, -73)
columbus = get_weather_data(40, -83)
philly = get_weather_data(39, -75)
newark = get_weather_data(40, -74)
houston = get_weather_data(29, -95)
indianapolis = get_weather_data(39, -86)
milwaukee = get_weather_data(43, -87)

In [6]:
#extract data
la_data = extract_temperature_data(la)
nyc_data = extract_temperature_data(nyc)
chi_data = extract_temperature_data(chi)
detroit_data = extract_temperature_data(detroit)
columbus_oh_data = extract_temperature_data(columbus)
philly_data = extract_temperature_data(philly)
newark_data = extract_temperature_data(newark)
houston_data = extract_temperature_data(houston)
indianapolis_data = extract_temperature_data(indianapolis)
milwaukee_data = extract_temperature_data(milwaukee)

Data point: {'date': '2011-01-01', 'tavg': 26.8, 'tmin': 19.6, 'tmax': 35.4, 'prcp': 0.0, 'snow': None, 'wdir': None, 'wspd': None, 'wpgt': None, 'pres': None, 'tsun': None}
Data point: {'date': '2011-01-02', 'tavg': 27.7, 'tmin': 21.0, 'tmax': 31.8, 'prcp': 0.0, 'snow': None, 'wdir': None, 'wspd': None, 'wpgt': None, 'pres': None, 'tsun': None}
Data point: {'date': '2011-01-03', 'tavg': 29.1, 'tmin': 21.9, 'tmax': 34.2, 'prcp': 0.0, 'snow': None, 'wdir': None, 'wspd': None, 'wpgt': None, 'pres': None, 'tsun': None}
Data point: {'date': '2011-01-04', 'tavg': 29.1, 'tmin': 20.5, 'tmax': 38.7, 'prcp': 0.0, 'snow': None, 'wdir': None, 'wspd': None, 'wpgt': None, 'pres': None, 'tsun': None}
Data point: {'date': '2011-01-05', 'tavg': 31.1, 'tmin': 24.8, 'tmax': 37.4, 'prcp': 0.0, 'snow': None, 'wdir': None, 'wspd': None, 'wpgt': None, 'pres': None, 'tsun': None}
Data point: {'date': '2011-01-06', 'tavg': 27.1, 'tmin': 20.8, 'tmax': 33.8, 'prcp': 0.0, 'snow': None, 'wdir': None, 'wspd': None

Data point: {'date': '2012-08-24', 'tavg': 75.2, 'tmin': 66.9, 'tmax': 84.9, 'prcp': 0.0, 'snow': 0.0, 'wdir': None, 'wspd': 3.0, 'wpgt': None, 'pres': 1021.0, 'tsun': None}
Data point: {'date': '2012-08-25', 'tavg': 74.3, 'tmin': 69.1, 'tmax': 82.0, 'prcp': 0.0, 'snow': 0.0, 'wdir': None, 'wspd': 6.6, 'wpgt': None, 'pres': 1024.2, 'tsun': None}
Data point: {'date': '2012-08-26', 'tavg': 71.6, 'tmin': 66.0, 'tmax': 78.1, 'prcp': 0.0, 'snow': 0.0, 'wdir': None, 'wspd': 6.3, 'wpgt': None, 'pres': 1025.3, 'tsun': None}
Data point: {'date': '2012-08-27', 'tavg': 72.9, 'tmin': 66.9, 'tmax': 81.0, 'prcp': 0.772, 'snow': 0.0, 'wdir': None, 'wspd': 6.2, 'wpgt': None, 'pres': 1020.2, 'tsun': None}
Data point: {'date': '2012-08-28', 'tavg': 78.8, 'tmin': 73.0, 'tmax': 87.1, 'prcp': 0.13, 'snow': 0.0, 'wdir': 280.0, 'wspd': 8.9, 'wpgt': None, 'pres': 1011.0, 'tsun': None}
Data point: {'date': '2012-08-29', 'tavg': 71.1, 'tmin': 64.9, 'tmax': 75.9, 'prcp': 0.0, 'snow': 0.0, 'wdir': None, 'wspd': 7

Data point: {'date': '2013-02-05', 'tavg': 33.1, 'tmin': 15.1, 'tmax': 51.3, 'prcp': None, 'snow': None, 'wdir': None, 'wspd': None, 'wpgt': None, 'pres': None, 'tsun': None}
Data point: {'date': '2013-02-06', 'tavg': 35.8, 'tmin': 23.7, 'tmax': 45.1, 'prcp': 0.02, 'snow': None, 'wdir': None, 'wspd': None, 'wpgt': None, 'pres': None, 'tsun': None}
Data point: {'date': '2013-02-07', 'tavg': 31.5, 'tmin': 21.7, 'tmax': 43.9, 'prcp': 0.051, 'snow': None, 'wdir': None, 'wspd': None, 'wpgt': None, 'pres': None, 'tsun': None}
Data point: {'date': '2013-02-08', 'tavg': 26.1, 'tmin': 24.4, 'tmax': 39.6, 'prcp': 0.0, 'snow': None, 'wdir': None, 'wspd': None, 'wpgt': None, 'pres': None, 'tsun': None}
Data point: {'date': '2013-02-09', 'tavg': 29.5, 'tmin': 16.2, 'tmax': 44.8, 'prcp': 0.0, 'snow': None, 'wdir': None, 'wspd': None, 'wpgt': None, 'pres': None, 'tsun': None}
Data point: {'date': '2013-02-10', 'tavg': 28.0, 'tmin': None, 'tmax': 39.7, 'prcp': None, 'snow': None, 'wdir': None, 'wspd':

Data point: {'date': '2014-11-20', 'tavg': 33.6, 'tmin': 24.1, 'tmax': None, 'prcp': None, 'snow': None, 'wdir': None, 'wspd': None, 'wpgt': None, 'pres': None, 'tsun': None}
Data point: {'date': '2014-11-21', 'tavg': 31.5, 'tmin': 23.5, 'tmax': 45.3, 'prcp': None, 'snow': None, 'wdir': None, 'wspd': None, 'wpgt': None, 'pres': None, 'tsun': None}
Data point: {'date': '2014-11-22', 'tavg': 31.5, 'tmin': 21.7, 'tmax': 45.3, 'prcp': None, 'snow': None, 'wdir': None, 'wspd': None, 'wpgt': None, 'pres': None, 'tsun': None}
Data point: {'date': '2014-11-23', 'tavg': 31.1, 'tmin': 19.6, 'tmax': 43.5, 'prcp': None, 'snow': None, 'wdir': None, 'wspd': None, 'wpgt': None, 'pres': None, 'tsun': None}
Data point: {'date': '2014-11-24', 'tavg': 30.4, 'tmin': 20.3, 'tmax': 41.5, 'prcp': None, 'snow': None, 'wdir': None, 'wspd': None, 'wpgt': None, 'pres': None, 'tsun': None}
Data point: {'date': '2014-11-25', 'tavg': 34.3, 'tmin': 23.5, 'tmax': 45.7, 'prcp': None, 'snow': None, 'wdir': None, 'wspd'

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [7]:
# Dictionary to hold all datasets to export
datasets = {
    'Los Angeles': la_data,
    'New York City': nyc_data,
    'Chicago': chi_data,
    'Detroit': detroit_data,
    'Columbus': columbus_oh_data,
    'Philadelphia': philly_data,
    'Newark': newark_data,
    'Houston': houston_data,
    'Indianapolis': indianapolis_data,
    'Milwaukee': milwaukee_data
}


In [8]:
export_datasets(datasets)