# Get 10 day Weather forecast 
- Jim Haskin

- GA-Data Science
- Dec 2015

- 2/17/2016

## Method
This 10 day forecast will be used to run through the predictive model to get the projected crime level for the next 10 days
- Use the Weather Underground API to get their forecast.
- Use the OpenWeatherMap API and the pyOWM wrapper to get their prediction for fields that were not in the Weather Underground API
- Calculate additional fields that were also calculated for the Training data.
- Write to file to be run with model

## Working notes
- TEMP : added both status fields. Need to see what works best with the historical 'events'
- additional features still to add (See Below)

In [1]:
import pandas as pd
import numpy as np
import seaborn as sb
%matplotlib inline
import pyowm
from datetime import timedelta
from sf_sun_moon import sf_sun_moon

In [2]:
import requests
# use sleep to stop us from getting timed out
from time import sleep

# Weather Underground json

### Use json and request to pull in a 10 day forcast from Weather Underground

In [3]:
api_key = '3bfa0f909a078622'
#api_key = 3bfa0f909a078622

In [4]:
# use requests library to interact with a URL
r = requests.get('http://api.wunderground.com/api/3bfa0f909a078622/forecast10day/q/CA/San_Francisco.json')

In [5]:
# check the status: 200 means success, 4xx means error
r.status_code

200

In [6]:
data = r.json()['forecast']['simpleforecast']['forecastday']

In [7]:
#data[0]

## Create a data frame from the 10 day forcasts
Fields
- date
- day of week
- day
- month
- year
- max temp
- mean temperature
- min temp
- temperature change
- mean humidity
- max wind speed
- mean wind speed
- precipitation
- conditions/events
- mean wind direction
- Sun minutes
- Sun units
- Moon phase


In [8]:
rows = []
for day in data:
    ddd = []
    dayr = day['date']['day']
    monthr = day['date']['month']
    yearr = day['date']['year']
    ddd.append(str(yearr)+'-'+str(monthr)+'-'+str(dayr))
    ddd.append(day['date']['weekday'])    
    ddd.append(dayr)    
    ddd.append(monthr)    
    ddd.append(yearr)
    temp_h = int(day['high']['fahrenheit'])
    temp_l = int(day['low']['fahrenheit'])
    temp_d = temp_h - temp_l
    temp_m = (temp_h + temp_l) / 2
    
    ddd.append(temp_h)
    ddd.append(temp_m)
    ddd.append(temp_l)
    ddd.append(temp_d)
#    ddd.append(day['maxhumidity'])
    ddd.append(day['avehumidity'])
#    ddd.append(day['minhumidity'])
    ddd.append(day['maxwind']['mph'])
    ddd.append(day['avewind']['mph'])
    ddd.append(day['qpf_allday']['in'])
    ddd.append(day['conditions'])
    ddd.append(day['avewind']['degrees'])
    rt = pd.Series(ddd)
    rows.append(rt)
colnames = ['date', 'dayofweek', 'day', 'month', 'year', 
            'temp_max', 'temp_mean', 'temp_min', 'temp_delta', 'humidity_mean',
            'wind_speed_max', 'wind_speed_mean',
            'percipitation', 'events', 'wind_direction']
   
wu_forecast = pd.DataFrame(rows)
wu_forecast.columns = colnames

# additional fields that can be used    
#    ddd.append(day['icon'])
#    ddd.append(day['avewind']['dir'])
#    ddd.append(day['maxwind']['degrees'])
#    ddd.append(day['maxwind']['dir'])
#    ddd.append(day['avehumidity'])
#    ddd.append(day['qpf_day']['in'])
#    ddd.append(day['qpf_night']['in'])
#    ddd.append(day['skyicon'])
#    ddd.append(day['period'])
#    ddd.append(day['pop'])


    

### Clean features

In [9]:
#wu_forecast['temp_max'] = wu_forecast['temp_max'].astype(int)
#wu_forecast['temp_min'] = wu_forecast['temp_min'].astype(int)
wu_forecast.head()



Unnamed: 0,date,dayofweek,day,month,year,temp_max,temp_mean,temp_min,temp_delta,humidity_mean,wind_speed_max,wind_speed_mean,percipitation,events,wind_direction
0,2016-2-17,Wednesday,17,2,2016,65,58,52,13,69,30,24,0.51,Rain,188
1,2016-2-18,Thursday,18,2,2016,59,55,52,7,67,20,15,0.01,Chance of a Thunderstorm,226
2,2016-2-19,Friday,19,2,2016,60,55,50,10,73,20,13,0.06,Chance of Rain,197
3,2016-2-20,Saturday,20,2,2016,62,56,51,11,61,10,6,0.0,Partly Cloudy,336
4,2016-2-21,Sunday,21,2,2016,67,59,52,15,62,10,7,0.0,Clear,269


In [10]:
wu_forecast.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10 entries, 0 to 9
Data columns (total 15 columns):
date               10 non-null object
dayofweek          10 non-null object
day                10 non-null int64
month              10 non-null int64
year               10 non-null int64
temp_max           10 non-null int64
temp_mean          10 non-null int64
temp_min           10 non-null int64
temp_delta         10 non-null int64
humidity_mean      10 non-null int64
wind_speed_max     10 non-null int64
wind_speed_mean    10 non-null int64
percipitation      10 non-null float64
events             10 non-null object
wind_direction     10 non-null int64
dtypes: float64(1), int64(11), object(3)
memory usage: 1.2+ KB


In [11]:
wu_forecast.describe()

Unnamed: 0,day,month,year,temp_max,temp_mean,temp_min,temp_delta,humidity_mean,wind_speed_max,wind_speed_mean,percipitation,wind_direction
count,10.0,10,10,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,21.5,2,2016,65.9,59.0,52.6,13.3,64.7,14.0,10.2,0.062,220.8
std,3.02765,0,0,4.280446,2.867442,1.505545,3.198958,4.738729,6.992059,5.711587,0.158521,84.644091
min,17.0,2,2016,59.0,55.0,50.0,7.0,56.0,10.0,6.0,0.0,32.0
25%,19.25,2,2016,62.75,56.5,52.0,11.5,62.0,10.0,7.0,0.0,190.25
50%,21.5,2,2016,67.0,59.5,52.5,14.0,65.0,10.0,7.5,0.005,227.0
75%,23.75,2,2016,69.5,61.75,53.75,15.0,67.0,17.5,12.0,0.02,266.75
max,26.0,2,2016,71.0,62.0,55.0,18.0,73.0,30.0,24.0,0.51,336.0


# OWM - OpenWeatherMap


### Use pyown wrapper to pull in a 10 day forecast from OWM


In [12]:
owm = pyowm.OWM('76ca531bdb080cdf04be66ddc063d249')
city = "SanFrancisco,usa"
fc = owm.daily_forecast(city, limit=10)
f = fc.get_forecast()
forecasts = f.get_weathers()

## Create a data frame from the 10 day forecasts
Fields
- mean pressure (in mb, convert to in.)
- cloud cover (in percent, convert to 1 - 8 scale)

In [13]:
rows = []
for day in forecasts:
    ddd = []
    ddd.append(day.get_reference_time('iso'))
    ddd.append(day.get_pressure()['press']*0.02953)
    ddd.append(day.get_clouds())
    ddd.append(day.get_status())
    rt = pd.Series(ddd)
    rows.append(rt)
colnames = ['date', 'sl_pressure_mean', 'cloud_cover', 'owm_status'] 
owm_forecast = pd.DataFrame(rows)
owm_forecast.columns = colnames
owm_forecast

# other fields available from the api

#    ddd.append(w.get_weather_icon_name())
#    ddd.append(w.get_temperature(unit='fahrenheit')['max'])
#    ddd.append(w.get_temperature(unit='fahrenheit')['min'])
#    ddd.append(w.get_wind()['speed'])
#    ddd.append(day['avewind']['degrees'])
#    ddd.append(day['avewind']['dir'])
#    ddd.append(day['maxwind']['mph'])
#    ddd.append(day['maxwind']['degrees'])
#    ddd.append(day['maxwind']['dir'])
#    ddd.append(day['maxhumidity'])
#    ddd.append(day['minhumidity'])
#    if w.get_rain() != {}:
#        ddd.append(w.get_rain()['all'])
#    else:
#        ddd.append(0.0)
#    ddd.append(day['qpf_day']['in'])
#    ddd.append(day['qpf_night']['in'])
#    ddd.append(w.get_visibility_distance())

#df['high_temp'] = df['high_temp'].astype(int)
#df['low_temp'] = df['low_temp'].astype(int)    

Unnamed: 0,date,sl_pressure_mean,cloud_cover,owm_status
0,2016-02-16 20:00:00+00,29.856306,92,Clouds
1,2016-02-17 20:00:00+00,29.795475,92,Rain
2,2016-02-18 20:00:00+00,30.132117,64,Rain
3,2016-02-19 20:00:00+00,30.295418,20,Rain
4,2016-02-20 20:00:00+00,30.473484,0,Clear
5,2016-02-21 20:00:00+00,30.205056,1,Clear
6,2016-02-22 20:00:00+00,30.164304,14,Clear
7,2016-02-23 20:00:00+00,30.03644,59,Clear
8,2016-02-24 20:00:00+00,30.075419,11,Clear
9,2016-02-25 20:00:00+00,30.179069,7,Clear


### Combine both forecasts

In [14]:
forecast = wu_forecast.copy()
forecast.insert(9, 'sl_pressure_mean', owm_forecast['sl_pressure_mean'])
forecast.insert(13, 'cloud_cover', owm_forecast['cloud_cover'])
forecast['owm_status'] = owm_forecast['owm_status']
forecast



Unnamed: 0,date,dayofweek,day,month,year,temp_max,temp_mean,temp_min,temp_delta,sl_pressure_mean,humidity_mean,wind_speed_max,wind_speed_mean,cloud_cover,percipitation,events,wind_direction,owm_status
0,2016-2-17,Wednesday,17,2,2016,65,58,52,13,29.856306,69,30,24,92,0.51,Rain,188,Clouds
1,2016-2-18,Thursday,18,2,2016,59,55,52,7,29.795475,67,20,15,92,0.01,Chance of a Thunderstorm,226,Rain
2,2016-2-19,Friday,19,2,2016,60,55,50,10,30.132117,73,20,13,64,0.06,Chance of Rain,197,Rain
3,2016-2-20,Saturday,20,2,2016,62,56,51,11,30.295418,61,10,6,20,0.0,Partly Cloudy,336,Rain
4,2016-2-21,Sunday,21,2,2016,67,59,52,15,30.473484,62,10,7,0,0.0,Clear,269,Clear
5,2016-2-22,Monday,22,2,2016,71,62,53,18,30.205056,56,10,7,1,0.0,Partly Cloudy,32,Clear
6,2016-2-23,Tuesday,23,2,2016,70,62,55,15,30.164304,62,10,9,14,0.02,Partly Cloudy,167,Clear
7,2016-2-24,Wednesday,24,2,2016,67,60,53,14,30.03644,67,10,8,59,0.02,Chance of Rain,260,Clear
8,2016-2-25,Thursday,25,2,2016,70,62,54,16,30.075419,65,10,6,11,0.0,Partly Cloudy,305,Clear
9,2016-2-26,Friday,26,2,2016,68,61,54,14,30.179069,65,10,7,7,0.0,Clear,228,Clear


In [15]:
forecast.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10 entries, 0 to 9
Data columns (total 18 columns):
date                10 non-null object
dayofweek           10 non-null object
day                 10 non-null int64
month               10 non-null int64
year                10 non-null int64
temp_max            10 non-null int64
temp_mean           10 non-null int64
temp_min            10 non-null int64
temp_delta          10 non-null int64
sl_pressure_mean    10 non-null float64
humidity_mean       10 non-null int64
wind_speed_max      10 non-null int64
wind_speed_mean     10 non-null int64
cloud_cover         10 non-null int64
percipitation       10 non-null float64
events              10 non-null object
wind_direction      10 non-null int64
owm_status          10 non-null object
dtypes: float64(2), int64(12), object(4)
memory usage: 1.5+ KB


## Add in Features
Still to do
- Sun level
- Temp mean
- deltas
- wind chill
- real feel
- etc

### Sun time and Moon phase

In [16]:
forecast['sun_length'], forecast['moon_phase'] = zip(*forecast['date'].apply(sf_sun_moon))

### Sun Units

In [17]:
forecast['sun_units'] = forecast['sun_length'] * (1.0 - (forecast['cloud_cover']  * .01))

In [18]:
forecast.head()

Unnamed: 0,date,dayofweek,day,month,year,temp_max,temp_mean,temp_min,temp_delta,sl_pressure_mean,...,wind_speed_max,wind_speed_mean,cloud_cover,percipitation,events,wind_direction,owm_status,sun_length,moon_phase,sun_units
0,2016-2-17,Wednesday,17,2,2016,65,58,52,13,29.856306,...,30,24,92,0.51,Rain,188,Clouds,654.516667,0.297417,52.361333
1,2016-2-18,Thursday,18,2,2016,59,55,52,7,29.795475,...,20,15,92,0.01,Chance of a Thunderstorm,226,Rain,656.766667,0.331351,52.541333
2,2016-2-19,Friday,19,2,2016,60,55,50,10,30.132117,...,20,13,64,0.06,Chance of Rain,197,Rain,659.05,0.365285,237.258
3,2016-2-20,Saturday,20,2,2016,62,56,51,11,30.295418,...,10,6,20,0.0,Partly Cloudy,336,Rain,661.316667,0.399219,529.053333
4,2016-2-21,Sunday,21,2,2016,67,59,52,15,30.473484,...,10,7,0,0.0,Clear,269,Clear,663.6,0.433153,663.6


In [19]:
forecast.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10 entries, 0 to 9
Data columns (total 21 columns):
date                10 non-null object
dayofweek           10 non-null object
day                 10 non-null int64
month               10 non-null int64
year                10 non-null int64
temp_max            10 non-null int64
temp_mean           10 non-null int64
temp_min            10 non-null int64
temp_delta          10 non-null int64
sl_pressure_mean    10 non-null float64
humidity_mean       10 non-null int64
wind_speed_max      10 non-null int64
wind_speed_mean     10 non-null int64
cloud_cover         10 non-null int64
percipitation       10 non-null float64
events              10 non-null object
wind_direction      10 non-null int64
owm_status          10 non-null object
sun_length          10 non-null float64
moon_phase          10 non-null float64
sun_units           10 non-null float64
dtypes: float64(5), int64(12), object(4)
memory usage: 1.7+ KB


## Write forecast data to file

In [20]:
forecast.to_csv('forecast_10day.csv')