In [None]:
%matplotlib inline
import json
from pandas.io.json import json_normalize
import pandas as pd
import numpy as np
from pprint import pprint
from datetime import datetime
import matplotlib.pyplot as plt

In [None]:
# function to convert Kelvin to Fahrenheit
def k_to_c (k):
    return k - 273.15

# function to calculate temperature dew point
#  equation ==> Td = T - ((100 - RH) / 5)

def calculate_dp(T, H):
    return T - ((100 - H) / 5)

# function to create new features based on 3 previous days
def new_features(merged_df, feature, N): 
    # total number of rows
    rows = merged_df.shape[0]
    # a list representing Nth prior measurements of feature
    # notice that the front of the list needs to be padded with N
    # None values to maintain the constistent rows length for each N
    numb_days_prior_measurements = [None]*N + [merged_df[feature][i-N] for i in range(N, rows)]
    # make a new column name of feature_N and add to DataFrame
    col_name = "{}_{}".format(feature, N)
    merged_df[col_name] = numb_days_prior_measurements


In [3]:
#Kyoto
with open('json_files/kyoto_weather.json') as f:
    kyoto = json.load(f)

kyoto_date = []
kyo_temp = []
kyo_max = []
kyo_min = []
kyo_humidity = []
kyo_pressure = []
kyo_wind = []
kyo_clouds = []
kyo_desc = []


for measure in kyoto:
    kyoto_date.append(measure['dt_iso'])
    kyo_temp.append(measure['main']['temp'])
    kyo_max.append(measure['main']['temp_max'])
    kyo_min.append(measure['main']['temp_min'])
    kyo_pressure.append(measure['main']['pressure'])
    kyo_humidity.append(measure['main']['humidity'])
    kyo_wind.append(measure['wind']['speed'])
    kyo_clouds.append(measure['clouds']['all'])
    kyo_desc.append(measure['weather'][0]['main'])

# Convert temperature from Kelvin to Celsius
kyo_c = []
for k in kyo_temp:
    c = round(k_to_c(k))
    kyo_c.append(c)

kyo_max_c = []
for k in kyo_max:
    c = round(k_to_c(k))
    kyo_max_c.append(c)

kyo_min_c = []
for k in kyo_min:
    c = round(k_to_c(k))
    kyo_min_c.append(c)

# Calculate dew point
kyo_dp = []
for T ,H in zip(kyo_c, kyo_humidity):
    dp = calculate_dp(T,H)
    kyo_dp.append(dp)
    
kyo_max_dp = []
for T ,H in zip(kyo_max_c, kyo_humidity):
    dp = calculate_dp(T,H)
    kyo_max_dp.append(dp)
    
kyo_min_dp = []
for T ,H in zip(kyo_min_c, kyo_humidity):
    dp = calculate_dp(T,H)
    kyo_min_dp.append(dp)

# convert date to show only day without time
kyo_date = []
for day in kyoto_date:
    timestamp = datetime.strptime(day,'%Y-%m-%d %H:%M:%S +0000 UTC')
    day_only = datetime.strftime(timestamp,'%Y-%m-%d')
    kyo_date.append(day_only)

# Create dict to hold all key, values 
kyoto_dict = {
    "Date": kyo_date,
    "Kyo_temp": kyo_c,
    "Kyo_max": kyo_max_c,
    "Kyo_min": kyo_min_c,
    "Kyo_dwp": kyo_dp,
    "Kyo_mx_dwp": kyo_max_dp,
    "Kyo_mi_dwp": kyo_min_dp,
    "Kyo_pressure": kyo_pressure,
    "Kyo_humidity": kyo_humidity,
    "Kyo_wind": kyo_wind,
    "Kyo_clouds": kyo_clouds,
    "Kyo_desc": kyo_desc
}

kyoto_df = pd.DataFrame(kyoto_dict)
kyoto_mean = kyoto_df.groupby("Date").mean()

In [64]:
#MANLY
with open('json_files/manly_weather.json') as f:
    manly = json.load(f)

manly_date = []
man_temp = []
man_max = []
man_min = []
man_humidity = []
man_pressure = []
man_wind = []
man_clouds = []
man_desc = []


    
for measure in manly:
    manly_date.append(measure['dt_iso'])
    man_temp.append(measure['main']['temp'])
    man_max.append(measure['main']['temp_max'])
    man_min.append(measure['main']['temp_min'])
    man_pressure.append(measure['main']['pressure'])
    man_humidity.append(measure['main']['humidity'])
    man_wind.append(measure['wind']['speed'])
    man_clouds.append(measure['clouds']['all'])
    man_desc.append(measure['weather'][0]['main'])
    
# Convert temperature from Kelvin to Celsiu
man_c = []
for k in man_temp:
    c = round(k_to_c(k))
    man_c.append(c)

man_max_c = []
for k in man_max:
    c = round(k_to_c(k))
    man_max_c.append(c)

man_min_c = []
for k in man_min:
    c = round(k_to_c(k))
    man_min_c.append(c)

# Calculate dew point 
man_dp = []
for T ,H in zip(man_c, man_humidity):
    dp = calculate_dp(T,H)
    man_dp.append(dp)
    
man_max_dp = []
for T ,H in zip(man_max_c, man_humidity):
    dp = calculate_dp(T,H)
    man_max_dp.append(dp)

man_min_dp = []
for T ,H in zip(man_min_c, man_humidity):
    dp = calculate_dp(T,H)
    man_min_dp.append(dp)
    
# convert datetime to show only date without the time
man_date = []
for day in manly_date:
    timestamp = datetime.strptime(day,'%Y-%m-%d %H:%M:%S +0000 UTC')
    day_only = datetime.strftime(timestamp,'%Y-%m-%d')
    man_date.append(day_only)
    
# Create dict to hold desired key,values
manly_dict = {
    "Date": man_date,
    "Man_temp": man_c,
    "Man_max": man_max_c,
    "Man_min": man_min_c,
    "Man_dwp": man_dp,
    "Man_mx_dwp": man_max_dp,
    "Man_mi_dwp": man_min_dp,
    "Man_pressure": man_pressure,
    "Man_humidity": man_humidity,
    "Man_wind": man_wind,
    "Man_clouds": man_clouds,
    "Man_desc": man_desc
}

manly_df = pd.DataFrame(manly_dict)
manly_mean = manly_df.groupby("Date").mean()

In [5]:
#manly_mean.columns

In [65]:
features_manly = ['Man_temp', 'Man_max', 'Man_min', 'Man_dwp', 'Man_mx_dwp', 'Man_mi_dwp']
#N is the number of days prior to the prediction, 3 days for this model
for feature in features_manly:  
    if feature != 'Date':
        for N in range(1, 4):
            new_features(manly_mean, feature, N)

In [66]:
manly_mean = manly_mean.drop(['2017-01-01', '2017-01-02','2017-01-03'])

In [67]:
manly_mean.to_csv('features_csv/manly_features.csv')

In [9]:
import pickle
with open('linear_temp_model.pkl', 'rb') as file:
    model = pickle.load(file)

In [10]:
#manly_mean.columns

In [11]:
# set X and y values
predictors = ['Man_temp_1','Man_temp_2', 'Man_temp_3', 
              'Man_max_1', 'Man_max_2', 'Man_max_3',
              'Man_min_1', 'Man_min_2', 'Man_min_3',
              'Man_dwp_1', 'Man_dwp_2', 'Man_dwp_3', 
              'Man_mx_dwp_1', 'Man_mx_dwp_2', 'Man_mx_dwp_3',
              'Man_mi_dwp_1', 'Man_mi_dwp_2', 'Man_mi_dwp_3']
X = manly_mean[predictors]  
y = manly_mean['Man_temp']
model.score(X,y)

0.7610282727224129

In [12]:
pd.DataFrame({"True Temperatures": y, "Predicted Temperatures": model.predict(X)}).head()

Unnamed: 0_level_0,True Temperatures,Predicted Temperatures
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-01-04,22.041667,21.983582
2017-01-05,22.041667,21.701404
2017-01-06,22.333333,21.807705
2017-01-07,24.208333,21.836123
2017-01-08,25.25,23.480025


In [13]:
#merged_df = pd.merge(kyoto_mean, manly_mean, on =['Date'])
#merged_df.columns

In [14]:
#features = ['Kyo_temp', 'Kyo_max', 'Kyo_min', 'Kyo_dwp', 'Kyo_mx_dwp', 'Kyo_mi_dwp',
       #'Kyo_pressure', 'Kyo_humidity', 'Kyo_wind', 'Kyo_clouds', 'Man_temp',
       #'Man_max', 'Man_min', 'Man_dwp', 'Man_mx_dwp', 'Man_mi_dwp',
       #'Man_pressure', 'Man_humidity', 'Man_wind', 'Man_clouds']

In [15]:
#N is the number of days prior to the prediction, 3 days for this model
#for feature in features_manly:  
    #if feature != 'Date':
        #for N in range(1, 4):
            #new_features(merged_df, feature, N)"

In [16]:
#merged_df.columns

In [17]:
#merged_df.info()

In [18]:
#merged_df.head(10)

In [19]:
# drop first 3 row with missing values when we added new features
#merged_df = merged_df.drop(['2017-01-01', '2017-01-02','2017-01-03'])

In [20]:
#merged_df.info()

In [21]:
#merged_df.to_csv('merged_new_features.csv')

In [None]:
#df.to_csv('weather_combined.csv', index=False)

In [22]:
# NICE
with open('json_files/nice_weather.json') as f:
    nice = json.load(f)
    
nice_date = []
nice_temp = []
nice_max = []
nice_min = []
nice_humidity = []
nice_pressure = []
nice_wind = []
nice_clouds = []
nice_desc = []


    
for measure in nice:
    nice_date.append(measure['dt_iso'])
    nice_temp.append(measure['main']['temp'])
    nice_max.append(measure['main']['temp_max'])
    nice_min.append(measure['main']['temp_min'])
    nice_pressure.append(measure['main']['pressure'])
    nice_humidity.append(measure['main']['humidity'])
    nice_wind.append(measure['wind']['speed'])
    nice_clouds.append(measure['clouds']['all'])
    nice_desc.append(measure['weather'][0]['main'])
    
# Convert temperature in Kelvin into Celsius
nice_c = []
for k in nice_temp:
    c = round(k_to_c(k))
    nice_c.append(c)

nice_max_c = []
for k in nice_max:
    c = round(k_to_c(k))
    nice_max_c.append(c)

nice_min_c = []
for k in nice_min:
    c = round(k_to_c(k))
    nice_min_c.append(c)

# Calculate dew point
nice_dp = []
for T ,H in zip(nice_c, nice_humidity):
    dp = calculate_dp(T,H)
    nice_dp.append(dp)
    
nice_max_dp = []
for T ,H in zip(nice_max_c, nice_humidity):
    dp = calculate_dp(T,H)
    nice_max_dp.append(dp)

nice_min_dp = []
for T ,H in zip(nice_min_c, nice_humidity):
    dp = calculate_dp(T,H)
    nice_min_dp.append(dp)

# Convert datetime to show only date without time
nic_date = []
for day in nice_date:
    timestamp = datetime.strptime(day,'%Y-%m-%d %H:%M:%S +0000 UTC')
    day_only = datetime.strftime(timestamp,'%Y-%m-%d')
    nic_date.append(day_only)
    
# Create dict to hold desired key,values
nice_dict = {
    "Date": nic_date,
    "Nice_temp": nice_c,
    "Nice_max": nice_max_c,
    "Nice_min": nice_min_c,
    "Nice_dwp": nice_dp,
    "Nice_mx_dwp": nice_max_dp,
    "Nice_mi_dwp": nice_min_dp,
    "Nice_pressure": nice_pressure,
    "Nice_humidity": nice_humidity,
    "Nice_wind": nice_wind,
    "Nice_clouds": nice_clouds,
    "Nice_desc": nice_desc
}


In [23]:
nice_df = pd.DataFrame(nice_dict)
nice_grouped = nice_df.groupby('Date').mean()
nice_grouped.columns

Index(['Nice_temp', 'Nice_max', 'Nice_min', 'Nice_dwp', 'Nice_mx_dwp',
       'Nice_mi_dwp', 'Nice_pressure', 'Nice_humidity', 'Nice_wind',
       'Nice_clouds'],
      dtype='object')

In [24]:
features_nice = ['Nice_temp', 'Nice_max', 'Nice_min',
                 'Nice_dwp', 'Nice_mx_dwp','Nice_mi_dwp',
                 'Nice_pressure', 'Nice_humidity', 
                 'Nice_wind','Nice_clouds']

In [25]:
for feature in features_nice:  
    if feature != 'Date':
        for N in range(1, 4):
            new_features(nice_grouped, feature, N)

In [26]:
nice_grouped.head()

Unnamed: 0_level_0,Nice_temp,Nice_max,Nice_min,Nice_dwp,Nice_mx_dwp,Nice_mi_dwp,Nice_pressure,Nice_humidity,Nice_wind,Nice_clouds,...,Nice_pressure_3,Nice_humidity_1,Nice_humidity_2,Nice_humidity_3,Nice_wind_1,Nice_wind_2,Nice_wind_3,Nice_clouds_1,Nice_clouds_2,Nice_clouds_3
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-01,9.375,10.708333,6.583333,2.616667,3.95,-0.175,1023.708333,66.208333,2.083333,6.666667,...,,,,,,,,,,
2017-01-02,10.625,12.083333,8.916667,4.316667,5.775,2.608333,1019.291667,68.458333,2.75,7.291667,...,,66.208333,,,2.083333,,,6.666667,,
2017-01-03,9.333333,10.416667,8.0,2.808333,3.891667,1.475,1017.083333,67.375,2.0,5.625,...,,68.458333,66.208333,,2.75,2.083333,,7.291667,6.666667,
2017-01-04,7.958333,9.416667,5.583333,1.025,2.483333,-1.35,1013.75,65.333333,1.583333,0.0,...,1023.708333,67.375,68.458333,66.208333,2.0,2.75,2.083333,5.625,7.291667,6.666667
2017-01-05,7.0,8.125,5.291667,-0.633333,0.491667,-2.341667,1014.458333,61.833333,1.416667,27.291667,...,1019.291667,65.333333,67.375,68.458333,1.583333,2.0,2.75,0.0,5.625,7.291667


In [27]:
nice_grouped = nice_grouped.drop(['2017-01-01', '2017-01-02','2017-01-03'])


In [28]:
nice_grouped.head()

Unnamed: 0_level_0,Nice_temp,Nice_max,Nice_min,Nice_dwp,Nice_mx_dwp,Nice_mi_dwp,Nice_pressure,Nice_humidity,Nice_wind,Nice_clouds,...,Nice_pressure_3,Nice_humidity_1,Nice_humidity_2,Nice_humidity_3,Nice_wind_1,Nice_wind_2,Nice_wind_3,Nice_clouds_1,Nice_clouds_2,Nice_clouds_3
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-04,7.958333,9.416667,5.583333,1.025,2.483333,-1.35,1013.75,65.333333,1.583333,0.0,...,1023.708333,67.375,68.458333,66.208333,2.0,2.75,2.083333,5.625,7.291667,6.666667
2017-01-05,7.0,8.125,5.291667,-0.633333,0.491667,-2.341667,1014.458333,61.833333,1.416667,27.291667,...,1019.291667,65.333333,67.375,68.458333,1.583333,2.0,2.75,0.0,5.625,7.291667
2017-01-06,4.708333,5.458333,3.708333,-3.241667,-2.491667,-4.241667,1025.75,60.25,1.666667,55.0,...,1017.083333,61.833333,65.333333,67.375,1.416667,1.583333,2.0,27.291667,0.0,5.625
2017-01-07,4.875,6.0,3.291667,-0.8,0.325,-2.383333,1026.791667,71.625,1.791667,44.583333,...,1013.75,60.25,61.833333,65.333333,1.666667,1.416667,1.583333,55.0,27.291667,0.0
2017-01-08,5.833333,7.458333,3.25,1.15,2.775,-1.433333,1021.375,76.583333,1.458333,27.708333,...,1014.458333,71.625,60.25,61.833333,1.791667,1.666667,1.416667,44.583333,55.0,27.291667


In [68]:
nice_grouped.to_csv('features_csv/nice_features.csv')

In [69]:
import pickle
with open('linear_temp_model.pkl', 'rb') as file:
    model = pickle.load(file)

In [70]:
# set X and y values
predictors = ['Nice_temp_1', 'Nice_temp_2', 'Nice_temp_3',
              'Nice_max_1', 'Nice_max_2', 'Nice_max_3', 
              'Nice_min_1', 'Nice_min_2','Nice_min_3', 
              'Nice_dwp_1', 'Nice_dwp_2', 'Nice_dwp_3', 
              'Nice_mx_dwp_1','Nice_mx_dwp_2', 'Nice_mx_dwp_3',
              'Nice_mi_dwp_1', 'Nice_mi_dwp_2','Nice_mi_dwp_3']
X = nice_grouped[predictors]  
y = nice_grouped['Nice_temp']
model.score(X,y)

0.9465157252738517

In [71]:
pd.DataFrame({"True Temperatures": y, "Predicted Temperatures": model.predict(X)}).head()

Unnamed: 0_level_0,True Temperatures,Predicted Temperatures
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-01-04,7.958333,9.680399
2017-01-05,7.0,8.760713
2017-01-06,4.708333,7.847853
2017-01-07,4.875,5.786593
2017-01-08,5.833333,6.15761


In [57]:
#Salvador
with open('json_files/salvador.json') as f:
    salvador = json.load(f)

sal_date = []
sal_temp = []
sal_max = []
sal_min = []
sal_humidity = []
sal_pressure = []
sal_wind = []
sal_clouds = []
sal_desc = []


for measure in salvador:
    sal_date.append(measure['dt_iso'])
    sal_temp.append(measure['main']['temp'])
    sal_max.append(measure['main']['temp_max'])
    sal_min.append(measure['main']['temp_min'])
    sal_pressure.append(measure['main']['pressure'])
    sal_humidity.append(measure['main']['humidity'])
    sal_wind.append(measure['wind']['speed'])
    sal_clouds.append(measure['clouds']['all'])
    sal_desc.append(measure['weather'][0]['main'])

# Convert temperature from Kelvin to Celsius
sal_c = []
for k in sal_temp:
    c = round(k_to_c(k))
    sal_c.append(c)

sal_max_c = []
for k in sal_max:
    c = round(k_to_c(k))
    sal_max_c.append(c)

sal_min_c = []
for k in sal_min:
    c = round(k_to_c(k))
    sal_min_c.append(c)

# Calculate dew point
sal_dp = []
for T ,H in zip(sal_c, sal_humidity):
    dp = calculate_dp(T,H)
    sal_dp.append(dp)
    
sal_max_dp = []
for T ,H in zip(sal_max_c, sal_humidity):
    dp = calculate_dp(T,H)
    sal_max_dp.append(dp)
    
sal_min_dp = []
for T ,H in zip(sal_min_c, sal_humidity):
    dp = calculate_dp(T,H)
    sal_min_dp.append(dp)

# convert date to show only day without time
salv_date = []
for day in sal_date:
    timestamp = datetime.strptime(day,'%Y-%m-%d %H:%M:%S +0000 UTC')
    day_only = datetime.strftime(timestamp,'%Y-%m-%d')
    salv_date.append(day_only)

# Create dict to hold all key, values 
salvador_dict = {
    "Date": salv_date,
    "Sal_temp": sal_c,
    "Sal_max": sal_max_c,
    "Sal_min": sal_min_c,
    "Sal_dwp": sal_dp,
    "Sal_mx_dwp": sal_max_dp,
    "Sal_mi_dwp": sal_min_dp,
    "Sal_pressure": sal_pressure,
    "Sal_humidity": sal_humidity,
    "Sal_wind": sal_wind,
    "Sal_clouds": sal_clouds,
    "Sal_desc": sal_desc
}

salvador_df = pd.DataFrame(salvador_dict)
salvador_mean = salvador_df.groupby("Date").mean()
salvador_mean.head()



Unnamed: 0_level_0,Sal_temp,Sal_max,Sal_min,Sal_dwp,Sal_mx_dwp,Sal_mi_dwp,Sal_pressure,Sal_humidity,Sal_wind,Sal_clouds
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017-01-01,27.041667,27.041667,27.041667,22.266667,22.266667,22.266667,1015.333333,76.125,4.083333,40.0
2017-01-02,26.875,26.875,26.875,21.683333,21.683333,21.683333,1012.916667,74.041667,4.0,25.833333
2017-01-03,27.333333,27.333333,27.333333,21.708333,21.708333,21.708333,1012.333333,71.875,4.208333,26.666667
2017-01-04,27.416667,27.416667,27.416667,21.591667,21.591667,21.591667,1013.916667,70.875,5.291667,24.166667
2017-01-05,27.666667,27.666667,27.666667,24.266667,24.266667,24.266667,1014.958333,83.0,5.041667,37.5


In [58]:
features_sal = ['Sal_temp', 'Sal_max', 'Sal_min', 'Sal_dwp', 'Sal_mx_dwp', 'Sal_mi_dwp',
       'Sal_pressure', 'Sal_humidity', 'Sal_wind', 'Sal_clouds']
for feature in features_sal:  
    if feature != 'Date':
        for N in range(1, 4):
            new_features(salvador_mean, feature, N)

In [59]:
salvador_mean.head()

Unnamed: 0_level_0,Sal_temp,Sal_max,Sal_min,Sal_dwp,Sal_mx_dwp,Sal_mi_dwp,Sal_pressure,Sal_humidity,Sal_wind,Sal_clouds,...,Sal_pressure_3,Sal_humidity_1,Sal_humidity_2,Sal_humidity_3,Sal_wind_1,Sal_wind_2,Sal_wind_3,Sal_clouds_1,Sal_clouds_2,Sal_clouds_3
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-01,27.041667,27.041667,27.041667,22.266667,22.266667,22.266667,1015.333333,76.125,4.083333,40.0,...,,,,,,,,,,
2017-01-02,26.875,26.875,26.875,21.683333,21.683333,21.683333,1012.916667,74.041667,4.0,25.833333,...,,76.125,,,4.083333,,,40.0,,
2017-01-03,27.333333,27.333333,27.333333,21.708333,21.708333,21.708333,1012.333333,71.875,4.208333,26.666667,...,,74.041667,76.125,,4.0,4.083333,,25.833333,40.0,
2017-01-04,27.416667,27.416667,27.416667,21.591667,21.591667,21.591667,1013.916667,70.875,5.291667,24.166667,...,1015.333333,71.875,74.041667,76.125,4.208333,4.0,4.083333,26.666667,25.833333,40.0
2017-01-05,27.666667,27.666667,27.666667,24.266667,24.266667,24.266667,1014.958333,83.0,5.041667,37.5,...,1012.916667,70.875,71.875,74.041667,5.291667,4.208333,4.0,24.166667,26.666667,25.833333


In [60]:
salvador_mean = salvador_mean.drop(['2017-01-01', '2017-01-02','2017-01-03'])


In [62]:
salvador_mean.to_csv('features_csv/salvador_features.csv')

In [52]:
import pickle
with open('linear_temp_model.pkl', 'rb') as file:
    model = pickle.load(file)

In [53]:
# set X and y values
predictors = ['Sal_temp_1','Sal_temp_2', 'Sal_temp_3', 
              'Sal_max_1', 'Sal_max_2', 'Sal_max_3',
              'Sal_min_1', 'Sal_min_2', 'Sal_min_3',
              'Sal_dwp_1', 'Sal_dwp_2','Sal_dwp_3', 
              'Sal_mx_dwp_1', 'Sal_mx_dwp_2', 'Sal_mx_dwp_3',
              'Sal_mi_dwp_1', 'Sal_mi_dwp_2', 'Sal_mi_dwp_3']

X = salvador_mean[predictors]  
y = salvador_mean['Sal_temp']
model.score(X,y)

0.709061710289454

In [None]:
# Kauai
with open('json_files/kauai.json') as f:
    kauai = json.load(f)

kau_date = []
kau_temp = []
kau_max = []
kau_min = []
kau_humidity = []
kau_pressure = []
kau_wind = []
kau_clouds = []
kau_desc = []


for measure in salvador:
    kau_date.append(measure['dt_iso'])
    kau_temp.append(measure['main']['temp'])
    kau_max.append(measure['main']['temp_max'])
    kau_min.append(measure['main']['temp_min'])
    kau_pressure.append(measure['main']['pressure'])
    kau_humidity.append(measure['main']['humidity'])
    kau_wind.append(measure['wind']['speed'])
    kau_clouds.append(measure['clouds']['all'])
    kau_desc.append(measure['weather'][0]['main'])

# Convert temperature from Kelvin to Celsius
kau_c = []
for k in sal_temp:
    c = round(k_to_c(k))
    kau_c.append(c)

kau_max_c = []
for k in sal_max:
    c = round(k_to_c(k))
    kau_max_c.append(c)

kau_min_c = []
for k in sal_min:
    c = round(k_to_c(k))
    kau_min_c.append(c)

# Calculate dew point
sal_dp = []
for T ,H in zip(kau_c, kau_humidity):
    dp = calculate_dp(T,H)
    kau_dp.append(dp)
    
kau_max_dp = []
for T ,H in zip(kau_max_c, kau_humidity):
    dp = calculate_dp(T,H)
    kau_max_dp.append(dp)
    
kau_min_dp = []
for T ,H in zip(kau_min_c, kau_humidity):
    dp = calculate_dp(T,H)
    kau_min_dp.append(dp)

# convert date to show only day without time
kauai_date = []
for day in kau_date:
    timestamp = datetime.strptime(day,'%Y-%m-%d %H:%M:%S +0000 UTC')
    day_only = datetime.strftime(timestamp,'%Y-%m-%d')
    kauai_date.append(day_only)

# Create dict to hold all key, values 
kauai_dict = {
    "Date": kauai_date,
    "Sal_temp": kau_c,
    "Sal_max": kau_max_c,
    "Sal_min": kau_min_c,
    "Sal_dwp": kau_dp,
    "Sal_mx_dwp": kau_max_dp,
    "Sal_mi_dwp": kau_min_dp,
    "Sal_pressure": kau_pressure,
    "Sal_humidity": kau_humidity,
    "Sal_wind": kau_wind,
    "Sal_clouds": kau_clouds,
    "Sal_desc": kau_desc
}

kauai_df = pd.DataFrame(kauai_dict)
kauai_mean = kauai_df.groupby("Date").mean()
kauia_mean.head()

In [None]:
class_df = pd.read_csv('csv_files/kyoto_weather.csv')


In [89]:
class_df.head()

Unnamed: 0,Date,Mean_temp,Max_temp,Min_temp,Mean_dwp,Max_dwp,Min_dwp,Pressure,Humidity,Wind,...,Pressure_3,Humidity_1,Humidity_2,Humidity_3,Wind_1,Wind_2,Wind_3,Clouds_1,Clouds_2,Clouds_3
0,2017-01-01,6,7,4,3.6,4.6,1.6,1028,88,2,...,,,,,,,,,,
1,2017-01-01,7,9,5,4.6,6.6,2.6,1028,88,1,...,,88.0,,,2.0,,,75.0,,
2,2017-01-01,9,10,7,2.0,3.0,0.0,1028,65,1,...,,88.0,88.0,,1.0,2.0,,75.0,75.0,
3,2017-01-01,11,11,10,0.2,0.2,-0.8,1027,46,1,...,1028.0,65.0,88.0,88.0,1.0,1.0,2.0,40.0,75.0,75.0
4,2017-01-01,12,12,11,0.6,0.6,-0.4,1026,43,1,...,1028.0,46.0,65.0,88.0,1.0,1.0,1.0,20.0,40.0,75.0


In [90]:
class_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17443 entries, 0 to 17442
Data columns (total 42 columns):
Date           17443 non-null object
Mean_temp      17443 non-null int64
Max_temp       17443 non-null int64
Min_temp       17443 non-null int64
Mean_dwp       17443 non-null float64
Max_dwp        17443 non-null float64
Min_dwp        17443 non-null float64
Pressure       17443 non-null int64
Humidity       17443 non-null int64
Wind           17443 non-null int64
Clouds         17443 non-null int64
Description    17443 non-null object
Mean_temp_1    17442 non-null float64
Mean_temp_2    17441 non-null float64
Mean_temp_3    17440 non-null float64
Max_temp_1     17442 non-null float64
Max_temp_2     17441 non-null float64
Max_temp_3     17440 non-null float64
Min_temp_1     17442 non-null float64
Min_temp_2     17441 non-null float64
Min_temp_3     17440 non-null float64
Mean_dwp_1     17442 non-null float64
Mean_dwp_2     17441 non-null float64
Mean_dwp_3     17440 non-null flo

In [91]:
class_df = class_df.dropna()

In [92]:
class_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 17440 entries, 3 to 17442
Data columns (total 42 columns):
Date           17440 non-null object
Mean_temp      17440 non-null int64
Max_temp       17440 non-null int64
Min_temp       17440 non-null int64
Mean_dwp       17440 non-null float64
Max_dwp        17440 non-null float64
Min_dwp        17440 non-null float64
Pressure       17440 non-null int64
Humidity       17440 non-null int64
Wind           17440 non-null int64
Clouds         17440 non-null int64
Description    17440 non-null object
Mean_temp_1    17440 non-null float64
Mean_temp_2    17440 non-null float64
Mean_temp_3    17440 non-null float64
Max_temp_1     17440 non-null float64
Max_temp_2     17440 non-null float64
Max_temp_3     17440 non-null float64
Min_temp_1     17440 non-null float64
Min_temp_2     17440 non-null float64
Min_temp_3     17440 non-null float64
Mean_dwp_1     17440 non-null float64
Mean_dwp_2     17440 non-null float64
Mean_dwp_3     17440 non-null flo

In [93]:
class_df.to_csv('csv_files/class_df.csv', index=False)