In [102]:
## I'm importing the packages that I'll be using throughout this assignment
import pandas as pd
import os
import json
import requests
import numpy as np
import pandas as pd

from lets_plot import *
LetsPlot.setup_html()


In [103]:
## Setting the working directory as data, as I want to create the dataframes I will be using throughout this assignment
!cd /files/ds105a-2024-w06-summative-deyavuz/data

4874.56s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


## I am getting the API URL and printing it to see if I have made any mistakes
london_url = "https://historical-forecast-api.open-meteo.com/v1/forecast?latitude=52.52&longitude=13.41&start_date=2021-03-01&end_date=2024-10-29&hourly=rain,showers&timezone=Europe%2FLondon"

print(london_url)

response = requests.get(london_url)

london_data = response.json()
london_temp = london_data['hourly']['rain']
print(london_temp)

## Saved the historical London temperatures as a json file
with open("london_temperatures.json", "w") as file: 
    json.dump(london_data, file)

## Converted the json file into a csv for convenience and standardization
with open('london_temperatures.json') as inputfile:
    df = pd.read_json(inputfile)

df.to_csv('london_temperatures.csv', index=False)

## Attemped to use pandas to format the dataset
pd.read_csv("london_temperatures.csv")


## code to combine temp 1 2 and 3
with open(DATA_FILENAME, mode='w', encoding='utf-8') as feedsjson:
        entry = {}
        entry['name'] = args.name
        entry['url'] = args.url
        json.dump(entry, feedsjson)

In [104]:
def get_lat_lon(country_code, city):
    
    filepath = '../data/world_cities.csv'
    world_cities = pd.read_csv(filepath)

    city_data = world_cities[(world_cities['country'] == country_code) & 
                             (world_cities['name'] == city)]
    
    city_data = city_data.to_dict('records')
    
    if len(city_data) == 0:
        raise ValueError(f"No records found for {city}, {country_code} in {filepath}")

    latitude = city_data[0]['lat']
    longitude = city_data[0]['lng']

    return latitude, longitude

In [105]:
def build_url(latitude: float, longitude: float, start_date:str , end_date: str):
    base_historical_url = "https://historical-forecast-api.open-meteo.com/v1/forecast?"
    params_lat_long     = "latitude=" + str(latitude) + "&longitude="  + str(longitude)
    params_date         = "&start_date=" + start_date + "&end_date=" + end_date

    params_others       = "&daily=rain_sum,showers_sum&timezone=auto"

    final_url = base_historical_url + params_lat_long + params_date + params_others

    return final_url

In [106]:
def get_historical_data(country_code, city_name, start_date, end_date):

    latitude, longitude = get_lat_lon(country_code, city_name)

    url = build_url(latitude, longitude, start_date, end_date)

    response = requests.get(url)

    historical_data = response.json()
    return historical_data

In [107]:
sample_historical_data = get_historical_data('GB', 'London', '2021-10-10', '2023-10-10')

In [108]:
# Print out the structure of sample_historical_data to inspect it
print("Top-level keys in the response:", sample_historical_data.keys())

# Check if 'daily' key exists
if 'daily' in sample_historical_data:
    print(f"The 'daily' key exists with sub-keys: {sample_historical_data['daily'].keys()}")
else:
    print("The 'daily' key is missing in the response.")


Top-level keys in the response: dict_keys(['latitude', 'longitude', 'generationtime_ms', 'utc_offset_seconds', 'timezone', 'timezone_abbreviation', 'elevation', 'daily_units', 'daily'])
The 'daily' key exists with sub-keys: dict_keys(['time', 'rain_sum', 'showers_sum'])


In [109]:

print(f"The function returned an object of type: {type(sample_historical_data)}")
print(f"This dictionary has the following keys: {sample_historical_data.keys()}")
print(f"The information I want is under the following keys:")
print(f"  sample_historical_data['daily']['time'] \t\t\t- Sample: {sample_historical_data['daily']['time'][0:3]}")
print(f"  sample_historical_data['daily']['rain_sum'] \t- Sample: {sample_historical_data['daily']['rain_sum'][0:3]}")
print(f"  sample_historical_data['daily']['showers_sum'] \t- Sample: {sample_historical_data['daily']['showers_sum'][0:3]}")

The function returned an object of type: <class 'dict'>
This dictionary has the following keys: dict_keys(['latitude', 'longitude', 'generationtime_ms', 'utc_offset_seconds', 'timezone', 'timezone_abbreviation', 'elevation', 'daily_units', 'daily'])
The information I want is under the following keys:
  sample_historical_data['daily']['time'] 			- Sample: ['2021-10-10', '2021-10-11', '2021-10-12']
  sample_historical_data['daily']['rain_sum'] 	- Sample: [0.0, 0.0, 0.0]
  sample_historical_data['daily']['showers_sum'] 	- Sample: [0.0, 0.0, 0.0]


In [110]:
selected_country = 'GB'
selected_city    = 'London'

In [111]:
latitude, longitude = get_lat_lon(selected_country, selected_city)

In [112]:
print(f"The latitude & longitude of {selected_city} ({selected_country}) are: ({latitude}, {longitude})")

The latitude & longitude of London (GB) are: (51.50853, -0.12574)


In [113]:
url = build_url(latitude, longitude, '2021-10-10', '2023-10-30')
url

'https://historical-forecast-api.open-meteo.com/v1/forecast?latitude=51.50853&longitude=-0.12574&start_date=2021-10-10&end_date=2023-10-30&daily=rain_sum,showers_sum&timezone=auto'

In [117]:
json_data = get_historical_data(selected_country, selected_city, '2021-10-10', '2023-10-10') 

In [124]:
dates    = json_data['daily']['time']
rain_sum = json_data['daily']['rain_sum']
showers_sum = json_data['daily']['showers_sum']

len(dates) == len(rain_sum) == len(showers_sum) == 731

True

In [125]:
json_data = get_historical_data(selected_country, selected_city, '2021-10-10', '2023-10-10') 

final_data = {
    "country"  : selected_country,
    "city"     : selected_city,
    "date"     : json_data['daily']['time'],
    "rain_sum" : json_data['daily']['rain_sum'],
    "showers_sum" : json_data['daily']['showers_sum']
}

with open('../data/daily_temp.json', 'w') as file:
    json.dump(final_data, file)

In [126]:
with open('../data/daily_temp.json', 'r') as file:
    data = json.load(file)

data.keys()

dict_keys(['country', 'city', 'date', 'rain_sum', 'showers_sum'])

In [129]:
df = pd.DataFrame(data)

# How does the data look like?
df.head()

Unnamed: 0,country,city,date,rain_sum,showers_sum
0,GB,London,2021-10-10,0.0,0.0
1,GB,London,2021-10-11,0.0,0.0
2,GB,London,2021-10-12,0.0,0.0
3,GB,London,2021-10-13,0.0,0.0
4,GB,London,2021-10-14,0.0,0.0
