### Recovering data from SQL SB

In [92]:
import pandas as pd
import psycopg2
import matplotlib.pyplot as plt
from math import cos, radians

pd.options.display.max_columns = 999
%matplotlib inline

In [2]:
conn = psycopg2.connect("dbname = soaring_predictor")

### Loading flying metrics

In [3]:
F = pd.read_sql('SELECT * from flight_metrics;', conn)

In [21]:
F.columns

Index(['flight', 'date', 'pilot', 'launch_time', 'landing_time', 'flying_time',
       'launch_coord', 'landing_coord', 'landed_lz', 'linear_dist', 'max_alt',
       'total_alt_gain', 'left_perimiter', 'month', 'year', 'alt_sampe',
       'len_sampe', 'doy', 'timestamp'],
      dtype='object')

In [114]:
# Isolating the best flight of the date (by max flying time)

Y = pd.DataFrame(F.groupby('date')['flying_time','max_alt','left_perimiter'].mean())

### Loading weather data

In [110]:
W = pd.read_sql('SELECT * from weather;', conn)

In [78]:
W.head()

Unnamed: 0,date,alti_0,alti_4,alti_8,alti_12,alti_16,alti_20,drct_0,drct_4,drct_8,drct_12,drct_16,drct_20,dwpf_0,dwpf_4,dwpf_8,dwpf_12,dwpf_16,dwpf_20,p01i_0,p01i_4,p01i_8,p01i_12,p01i_16,p01i_20,relh_0,relh_4,relh_8,relh_12,relh_16,relh_20,sknt_0,sknt_4,sknt_8,sknt_12,sknt_16,sknt_20,skyc1_0,skyc1_4,skyc1_8,skyc1_12,skyc1_16,skyc1_20,tmpf_0,tmpf_4,tmpf_8,tmpf_12,tmpf_16,tmpf_20,doy
0,2013-01-02,30.334,30.2975,30.3075,30.255,30.24,30.205,0.0,0.0,72.5,150.0,87.5,55.0,27.896,27.005,27.95,22.505,20.21,20.255,0.0,0.0,0.0,0.0,0.0,0.0,93.774,93.23,78.7425,40.0875,45.64,56.53,0.0,0.0,2.0,5.25,3.25,2.75,0.15,0.0,0.0,0.0,0.0,0.0,29.48,28.715,34.52,45.5,39.47,34.475,2
1,2013-01-03,30.195,30.1475,30.14,30.115,30.125,30.1525,70.0,35.0,120.0,145.0,92.5,30.0,21.515,24.305,20.525,19.49,25.205,33.53,0.0,0.0,0.0,0.0,0.0125,0.015,65.9625,88.6125,47.9025,36.1925,58.615,86.2575,2.0,1.0,4.0,7.0,4.75,1.0,0.0,0.0,0.0,0.0625,0.75,1.0,32.225,27.23,38.75,44.735,39.515,37.265,3
2,2013-01-04,30.208333,30.258,30.312857,30.27,30.2125,30.18,93.333333,138.0,110.0,62.0,0.0,77.5,35.69,37.634,41.154286,43.736,41.27,38.48,0.01,0.003,0.0,0.0,0.0,0.0,93.811667,93.334,91.638571,83.808,92.7875,88.0025,2.833333,4.8,3.571429,2.4,0.0,0.75,0.833333,0.725,0.571429,0.55,0.1875,0.875,37.31,39.398,43.442857,48.416,43.25,41.81,4
3,2013-01-05,30.1675,30.17,30.1925,30.145,30.125,30.148889,87.5,157.5,92.5,150.0,142.5,72.222222,38.48,34.97,33.215,34.745,32.495,37.86,0.0,0.0,0.0,0.0,0.0,0.017778,91.625,76.0925,65.5925,69.7775,66.185,91.553333,2.75,3.75,3.5,5.25,4.0,2.666667,0.875,0.875,0.9375,0.75,0.5,0.666667,40.73,42.26,44.06,44.015,43.025,40.2,5
4,2013-01-06,30.166,30.162,30.178333,30.125,30.0825,30.0225,117.0,56.0,88.333333,27.5,90.0,57.5,37.256,37.832,39.38,40.55,40.235,39.245,0.0,0.0,0.0,0.0,0.0025,0.0075,94.687,92.686,90.475,83.8625,85.7975,91.655,5.3,1.4,2.333333,0.75,3.0,3.75,0.775,0.75,0.75,0.8125,0.5,0.6875,38.66,39.776,42.02,45.14,44.24,41.495,6


In [104]:
def get_cos(direction):
    return abs(cos(radians(direction)))

In [105]:
direction = ['drct_0', 'drct_4', 'drct_8', 'drct_12', 'drct_16', 'drct_20']

for col in direction:
    W[col]=W[col].apply(get_cos)

In [106]:
W.head()

Unnamed: 0,date,alti_0,alti_4,alti_8,alti_12,alti_16,alti_20,drct_0,drct_4,drct_8,drct_12,drct_16,drct_20,dwpf_0,dwpf_4,dwpf_8,dwpf_12,dwpf_16,dwpf_20,p01i_0,p01i_4,p01i_8,p01i_12,p01i_16,p01i_20,relh_0,relh_4,relh_8,relh_12,relh_16,relh_20,sknt_0,sknt_4,sknt_8,sknt_12,sknt_16,sknt_20,skyc1_0,skyc1_4,skyc1_8,skyc1_12,skyc1_16,skyc1_20,tmpf_0,tmpf_4,tmpf_8,tmpf_12,tmpf_16,tmpf_20,doy
0,2013-01-02,30.334,30.2975,30.3075,30.255,30.24,30.205,1.0,1.0,0.300706,0.866025,0.04361939,0.573576,27.896,27.005,27.95,22.505,20.21,20.255,0.0,0.0,0.0,0.0,0.0,0.0,93.774,93.23,78.7425,40.0875,45.64,56.53,0.0,0.0,2.0,5.25,3.25,2.75,0.15,0.0,0.0,0.0,0.0,0.0,29.48,28.715,34.52,45.5,39.47,34.475,2
1,2013-01-03,30.195,30.1475,30.14,30.115,30.125,30.1525,0.34202,0.819152,0.5,0.819152,0.04361939,0.866025,21.515,24.305,20.525,19.49,25.205,33.53,0.0,0.0,0.0,0.0,0.0125,0.015,65.9625,88.6125,47.9025,36.1925,58.615,86.2575,2.0,1.0,4.0,7.0,4.75,1.0,0.0,0.0,0.0,0.0625,0.75,1.0,32.225,27.23,38.75,44.735,39.515,37.265,3
2,2013-01-04,30.208333,30.258,30.312857,30.27,30.2125,30.18,0.058145,0.743145,0.34202,0.469472,1.0,0.21644,35.69,37.634,41.154286,43.736,41.27,38.48,0.01,0.003,0.0,0.0,0.0,0.0,93.811667,93.334,91.638571,83.808,92.7875,88.0025,2.833333,4.8,3.571429,2.4,0.0,0.75,0.833333,0.725,0.571429,0.55,0.1875,0.875,37.31,39.398,43.442857,48.416,43.25,41.81,4
3,2013-01-05,30.1675,30.17,30.1925,30.145,30.125,30.148889,0.043619,0.92388,0.043619,0.866025,0.7933533,0.305326,38.48,34.97,33.215,34.745,32.495,37.86,0.0,0.0,0.0,0.0,0.0,0.017778,91.625,76.0925,65.5925,69.7775,66.185,91.553333,2.75,3.75,3.5,5.25,4.0,2.666667,0.875,0.875,0.9375,0.75,0.5,0.666667,40.73,42.26,44.06,44.015,43.025,40.2,5
4,2013-01-06,30.166,30.162,30.178333,30.125,30.0825,30.0225,0.45399,0.559193,0.029085,0.887011,6.123234000000001e-17,0.5373,37.256,37.832,39.38,40.55,40.235,39.245,0.0,0.0,0.0,0.0,0.0025,0.0075,94.687,92.686,90.475,83.8625,85.7975,91.655,5.3,1.4,2.333333,0.75,3.0,3.75,0.775,0.75,0.75,0.8125,0.5,0.6875,38.66,39.776,42.02,45.14,44.24,41.495,6


### Merging the 2 sources of data

In [115]:
data = pd.merge(Y,W,how='left', right_on='date',left_index=True)

In [116]:
data.dropna(inplace=True)

In [27]:
import io

from sqlalchemy import create_engine

# fail, append or replace

def write_to_table(df, db_engine, table_name, if_exists='fail'):
    string_data_io = io.StringIO()
    df.to_csv(string_data_io, sep='|', index=False)
    pd_sql_engine = pd.io.sql.pandasSQL_builder(db_engine)
    table = pd.io.sql.SQLTable(table_name, pd_sql_engine, frame=df,
                               index=False, if_exists=if_exists)
    table.create()
    string_data_io.seek(0)
    string_data_io.readline()  # remove header
    with db_engine.connect() as connection:
        with connection.connection.cursor() as cursor:
            copy_cmd = "COPY %s FROM STDIN HEADER DELIMITER '|' CSV" % table_name
            cursor.copy_expert(copy_cmd, string_data_io)
        connection.connection.commit()

In [28]:
address = 'postgresql://@localhost:5432/soaring_predictor'
engine = create_engine(address)

In [29]:
write_to_table(data, engine, 'data',if_exists='replace')

In [70]:
data.columns

Index(['flying_time', 'max_alt', 'left_perimiter', 'date', 'alti_0', 'alti_4',
       'alti_8', 'alti_12', 'alti_16', 'alti_20', 'drct_0', 'drct_4', 'drct_8',
       'drct_12', 'drct_16', 'drct_20', 'dwpf_0', 'dwpf_4', 'dwpf_8',
       'dwpf_12', 'dwpf_16', 'dwpf_20', 'p01i_0', 'p01i_4', 'p01i_8',
       'p01i_12', 'p01i_16', 'p01i_20', 'relh_0', 'relh_4', 'relh_8',
       'relh_12', 'relh_16', 'relh_20', 'sknt_0', 'sknt_4', 'sknt_8',
       'sknt_12', 'sknt_16', 'sknt_20', 'skyc1_0', 'skyc1_4', 'skyc1_8',
       'skyc1_12', 'skyc1_16', 'skyc1_20', 'tmpf_0', 'tmpf_4', 'tmpf_8',
       'tmpf_12', 'tmpf_16', 'tmpf_20', 'doy'],
      dtype='object')

In [117]:
data.to_csv('data.csv')

In [72]:
pwd

'/Users/eduardodeangelis/Desktop/galvanize/soaring-predictor/notebooks'

In [33]:
T = pd.DataFrame(W.groupby('date')['tmpf_12'].mean())

In [45]:
# T.reset_index(inplace=True)
T.drop('level_0',inplace=True,axis=1)

In [47]:
W = pd.merge(Y,T,how='left',right_on='date',left_index=True)

In [49]:
W.dropna(inplace=True)

In [55]:
# returns day of the year

def doy(dt_obj):
    return int(dt_obj.strftime('%j'))

In [57]:
W['doy']=W['date'].apply(doy)

In [65]:
# fig,ax = plt.subplots(figsize = (18,8))
# ax.scatter(W['doy'], W['flying_time'], alpha = 0.5,)
# ax.plot(W['doy'],W['tmpf_12'])
# ax.set_title('Flight time [in minutes]',fontsize=20)
# ax.set_ylabel('minutes', fontsize=16)
# ax.set_xticks([0,90,180,270]) # choose which x locations to have ticks
# ax.set_xticklabels(['Jan','Apr',"Jul",'Oct'], fontsize=14)

In [64]:
W.sort_values('doy')
# W.groupby('doy')['tmpf_12'].mean()

Unnamed: 0,flying_time,max_alt,left_perimiter,date,tmpf_12,doy
366,16.583333,500,0,2014-01-03,45.005000,3
367,16.266667,546,0,2014-01-04,43.295000,4
9,21.366667,493,0,2013-01-11,34.790000,11
10,23.066667,682,0,2013-01-12,36.005000,12
740,7.166667,563,0,2015-01-12,50.990000,12
14,16.616667,478,0,2013-01-16,39.020000,16
748,20.250000,563,0,2015-01-20,48.470000,20
384,8.150000,517,0,2014-01-21,44.735000,21
1482,61.500000,625,1,2017-01-23,45.903269,23
386,15.333333,554,0,2014-01-23,46.715000,23
