In [7]:
import os
import pandas as pd 
import numpy as np
from datetime import datetime
from functools import reduce
import time
import csv

import json
from pandas.io.json import json_normalize

from pprint import pprint

import sqlite3

In [8]:
def k_to_c (k):
    return k - 273.15

# function to calculate temperature dew point
#  equation ==> Td = T - ((100 - RH) / 5)

def calculate_dp(T, H):
    return T - ((100 - H) / 5)

# function to create new features based on 3 previous days
def new_features(df, feature, N): 
    # total number of rows
    rows = df.shape[0]
    # a list representing number of days for prior measurements of feature
    # notice that the front of the list needs to be padded with N
    # None values to maintain the constistent rows length for each N
    numb_days_prior_measurements = [None]*N + [df[feature][i-N] for i in range(N, rows)]
    # make a new column name of feature_N and add to DataFrame
    col_name = "{}_{}".format(feature, N)
    df[col_name] = numb_days_prior_measurements

In [9]:
def json_to_class(city_):

    connex = sqlite3.connect("weather_predict.db")
    cur = connex.cursor()
    with open( 'json_files/' + city_ + '_weather.json') as f:
        city = json.load(f)

    date = []
    city_temp = []
    city_max = []
    city_min = []
    city_humidity = []
    city_pressure = []
    city_wind = []
    city_clouds = []
    city_desc = []


    for measure in city:
        date.append(measure['dt_iso'])
        city_temp.append(measure['main']['temp'])
        city_max.append(measure['main']['temp_max'])
        city_min.append(measure['main']['temp_min'])
        city_pressure.append(measure['main']['pressure'])
        city_humidity.append(measure['main']['humidity'])
        city_wind.append(measure['wind']['speed'])
        city_clouds.append(measure['clouds']['all'])
        city_desc.append(measure['weather'][0]['main'])

    # Convert temperature from Kelvin to Celsius
    temp_c = []
    for k in city_temp:
        c = round(k_to_c(k),2)
        temp_c.append(c)

    temp_max_c = []
    for k in city_max:
        c = round(k_to_c(k),2)
        temp_max_c.append(c)

    temp_min_c = []
    for k in city_min:
        c = round(k_to_c(k),2)
        temp_min_c.append(c)

    # Calculate dew point
    city_dp = []
    for T ,H in zip(temp_c, city_humidity):
        dp = calculate_dp(T,H)
        city_dp.append(dp)

    city_max_dp = []
    for T ,H in zip(temp_max_c, city_humidity):
        dp = calculate_dp(T,H)
        city_max_dp.append(dp)

    city_min_dp = []
    for T ,H in zip(temp_min_c, city_humidity):
        dp = calculate_dp(T,H)
        city_min_dp.append(dp)

    # convert date to show only day without time
    city_date = []
    for day in date:
        timestamp = datetime.strptime(day,'%Y-%m-%d %H:%M:%S +0000 UTC')
        day_only = datetime.strftime(timestamp,'%Y-%m-%d')
        city_date.append(day_only)

    # Create dict to hold all key, values 
    city_dict = {
        "Date": city_date,
        "Avg_temp": temp_c,
        "Temp_max": temp_max_c,
        "Temp_min": temp_min_c,
        "Avg_dwp": city_dp,
        "Max_dwp": city_max_dp,
        "Min_dwp": city_min_dp,
        "Pressure": city_pressure,
        "Humidity": city_humidity,
        "Wind": city_wind,
        "Clouds": city_clouds,
        "Description": city_desc
    }

    city_df = pd.DataFrame(city_dict)
    grouped_city = city_df.groupby('Date')
    city_mean = grouped_city[['Avg_temp','Avg_dwp']].mean()
    city_max = grouped_city[['Temp_max','Max_dwp']].max()
    city_min= grouped_city[['Temp_min','Min_dwp']].min()

    dfs = [city_mean, city_max, city_min]

    df_final = reduce(lambda left,right: pd.merge(left,right,on='Date'), dfs)

    features_city = ['Avg_temp', 'Avg_dwp', 'Temp_max', 'Max_dwp', 'Temp_min', 'Min_dwp']
    #N is the number of days prior to the prediction, 3 days for this model
    for feature in features_city:  
        if feature != 'Date':
            for N in range(1, 4):
                new_features(df_final, feature, N)

    clean_df = df_final.dropna()
    clean_df.to_sql(name=city_+'_train_feats', con=connex, if_exists='replace',index=True

In [10]:
kyoto = 'kyoto'
manly = 'manly'
nice = 'nice'
salvador = 'salvador'
kauai = 'kauai'
json_to_class(kyoto)
json_to_class(manly)
json_to_class(nice)
json_to_class(salvador)
json_to_class(kauai)

In [5]:
# function to convert Kelvin to Fahrenheit
def k_to_c (k):
    return k - 273.15

# function to calculate temperature dew point
#  equation ==> Td = T - ((100 - RH) / 5)

def calculate_dp(T, H):
    return T - ((100 - H) / 5)

# function to create new features based on 3 previous days
def new_features(df, feature, N): 
    # total number of rows
    rows = df.shape[0]
    # a list representing number of days for prior measurements of feature
    # notice that the front of the list needs to be padded with N
    # None values to maintain the constistent rows length for each N
    numb_days_prior_measurements = [None]*N + [df[feature][i-N] for i in range(N, rows)]
    # make a new column name of feature_N and add to DataFrame
    col_name = "{}_{}".format(feature, N)
    df[col_name] = numb_days_prior_measurements

In [6]:
city = pd.read_csv('kyoto_recent.csv')
city_date = []

for day in city['Date']:
    timestamp = datetime.strptime(day,'%Y-%m-%d %H:%M:%S')
    day_only = datetime.strftime(timestamp,'%Y-%m-%d')
    city_date.append(day_only)
date = pd.DataFrame(city_date)

city['Date'] = date.values

del city['Unnamed: 0']

grouped_city = city.groupby('Date')
city_mean = grouped_city[['Mean_temp','Mean_dwp']].mean()
city_max = grouped_city[['Max_temp','Max_dwp']].max()
city_min= grouped_city[['Min_temp','Min_dwp']].min()

dfs = [city_mean, city_max, city_min]

df_final = reduce(lambda left,right: pd.merge(left,right,on='Date'), dfs)
city_organized = df_final[['Mean_temp','Max_temp','Min_temp','Mean_dwp','Max_dwp','Min_dwp']]
city_renamed = city_organized.rename(columns={'Mean_temp': 'Avg_temp','Max_temp': 'Temp_max','Min_temp':'Temp_min',
                                       'Mean_dwp': 'Avg_dwp','Max_dwp': 'Max_dwp','Min_dwp': 'Min_dwp'})
#features_city = list(city_renamed.columns.values)
#N is the number of days prior to the prediction, 3 days for this model
#for feature in features_city:  
 #   if feature != 'Date':
  #      for N in range(1, 4):
   #         new_features(city_renamed, feature, N)
new_index = city_renamed.reset_index()
place_holder_row = new_index.append(pd.Series([np.nan]),ignore_index = True)
del place_holder_row[0]
place_holder_row
#city_renamed.to_csv(city_name +'_test_features.csv')

FileNotFoundError: File b'kyoto_recent.csv' does not exist

In [None]:
features_city = list(place_holder_row.columns.values)

#N is the number of days prior to the prediction, 3 days for this model
for feature in features_city:  
    if feature != 'Date':
        for N in range(1, 4):
            new_features(place_holder_row, feature, N)
new_index = place_holder_row.sort_index(ascending=False)
most_recent_feat = new_index.reset_index()
most_recent_feat

In [None]:
most_recent_feat = place_holder_row.iloc[8,:]
most_recent_feat

In [None]:
numb_days_prior_measurements = [None]*N + [df[feature][i-N] for i in range(N, rows)]

In [None]:
numb_days_prior_measurements

In [None]:
# total number of rows
rows = df.shape[0]
# a list representing number of days for prior measurements of feature
# notice that the front of the list needs to be padded with N
# None values to maintain the constistent rows length for each N
numb_days_prior_measurements = [None]*N + [df[feature][i-N] for i in range(N, rows)]
# make a new column name of feature_N and add to DataFrame
col_name = "{}_{}".format(feature, N)
df[col_name] = numb_days_prior_measurements