In [29]:
import pandas as pd
import psycopg2
import psycopg2.extras

In [30]:
# Read in the NYC hourly weather data
NYC_wx = pd.read_csv('./NYC.csv')

In [31]:
# Convert valid column to date type and extract just the time and date
# We will need to extract the hour as well to join to the citibike data
NYC_wx['valid'] = pd.to_datetime(NYC_wx['valid'])
NYC_wx['date']= NYC_wx['valid'].dt.date
NYC_wx['time']= NYC_wx['valid'].dt.time
NYC_wx['hour']= NYC_wx['valid'].dt.hour

In [32]:
# Indicate the columns that you'd like to fill the null values in based on the 
# last non-null value and next non-null value.

cols = ['dwpf', 'tmpf', 'relh', 'vsby', 'feel']
for c in cols:
    # First - Find all the indeces where the values are null
    null = NYC_wx[NYC_wx[c].isnull()].index.tolist()

    # Loop over those null indices and find the last non-null and next non-null value
    # Once they are found, break the foor loop
    for d in range(0, len(null)):
        for i in range(0, len(NYC_wx)):
            if pd.isnull(NYC_wx[c][i]):
                start = NYC_wx[c][i-1]
                start_index = i
                break
            for j in range(i+1, len(NYC_wx)):
                if pd.notnull(NYC_wx[c][j]):
                    stop = NYC_wx[c][j]
                    stop_index = j
                    break
    # Only replace the values where the indices are equal 
        if null[d] == start_index:
            # Decrease values if the values got smaller
            if start-stop > 0:
                increment = round((start-stop)/(stop_index-start_index), 1)
                NYC_wx.loc[start_index, c] =  start - increment
                start = start - increment
                start_index = start_index + 1
            # Increase the values if they got bigger, or keep the same if there was
            # No change
            else:
                increment = round((stop-start)/(stop_index-start_index), 1)
                NYC_wx.loc[start_index, c] = start + increment
                start = start + increment
                start_index = start_index + 1

In [33]:
# Forward fill the wind direction, wind speed in knots, and sky coverage 1 columns
for col in ['drct', 'sknt', 'skyc1']:
    NYC_wx[col] = NYC_wx[col].ffill()  

In [34]:
# Find the corresponding alti values for the sea level pressure column when
# the sea level pressure column is null
mslp_null = NYC_wx[NYC_wx['mslp'].isnull()].index.tolist()
for d in range(0, len(mslp_null)):
    index = mslp_null[d]
    # If the alti value is also null, then Loop over those null indices and find 
    # the last non-null and next non-null value for mslp
    if pd.isnull(NYC_wx['alti'][index]):
        start = NYC_wx['mslp'][index-1]
        start_index = index
        for j in range(index+1, len(NYC_wx)):
            if pd.notnull(NYC_wx['mslp'][j]):
                stop = NYC_wx['mslp'][j]
                stop_index = j
                break
        # Only replace the values where the indices are equal        
        if mslp_null[d] == start_index:
            # Decrease values if the values got smaller
            if start-stop > 0:
                increment = round((start-stop)/(stop_index-start_index), 1)
                NYC_wx.loc[start_index, 'mslp'] = start - increment
                start = start - increment
                start_index = start_index + 1
            # Increase the values if they got bigger, or keep the same if there was
            # No change
            else:
                increment = round((stop-start)/(stop_index-start_index), 1)
                NYC_wx.loc[start_index, 'mslp'] = start + increment
                start = start + increment
                start_index = start_index + 1
    # Take the average value of the mslp when the alti equals the value at the
    # specific index.
    else: 
        alti = round(NYC_wx['alti'][index], 1)
        hold = NYC_wx[NYC_wx['alti'] == alti]
        hold = hold[['alti', 'mslp']].drop_duplicates()
        average = round(hold['mslp'].mean(), 1)
        NYC_wx.loc[index, 'mslp'] = average
    

In [35]:
# Find the corresponding mslp values for the alti column when
# the alti column is null.  The mslp column should never be null now, so we don't
# have to worry about both alti and mslp columns being null.
alti_null = NYC_wx[NYC_wx['alti'].isnull()].index.tolist()
for d in range(0, len(alti_null)):
    index = alti_null[d]
    mslp = round(NYC_wx['mslp'][index], 1)
    hold = NYC_wx[NYC_wx['mslp'] == mslp]
    hold = hold[['alti', 'mslp']].drop_duplicates()
    average = round(hold['alti'].mean(), 1)
    NYC_wx.loc[index, 'alti'] = average  

In [36]:
# Fill wxcodes that are null with "none"
NYC_wx['wxcodes'].fillna(value='None', inplace=True)

In [37]:
# Fill ice_accretion rows and snowdepth that are null with 0
NYC_wx['ice_accretion_1hr'].fillna(value=0, inplace=True)
NYC_wx['ice_accretion_3hr'].fillna(value=0, inplace=True)         
NYC_wx['ice_accretion_6hr'].fillna(value=0, inplace=True)
NYC_wx['snowdepth'].fillna(value=0, inplace=True)

In [38]:
# Make new column with daily max wind.  If peak_wind_gust is filled out, use the
# maximum there. If that's not filled out for the day, take the max gust value. 
# If that's not filled out for the day, use the maximum windspeed in knots column

# First, i need to get the unique dates
dates = list(NYC_wx['date'].drop_duplicates())
for d in range(0, len(dates)):
    date = dates[d]
    hold = NYC_wx[NYC_wx['date'] == date]
    if pd.isnull(hold['peak_wind_gust'].max()):
        if pd.isnull(hold['gust'].max()):
            max_windspeed = hold.loc[hold['sknt'].idxmax()]['sknt']
            # get the index so we can extract the time
            max_windspeed_time = hold.loc[hold['sknt'].idxmax()]['time']
        else:
            max_windspeed = hold.loc[hold['gust'].idxmax()]['gust']
            max_windspeed_time = hold.loc[hold['gust'].idxmax()]['time']
    else: 
        max_windspeed = hold.loc[hold['peak_wind_gust'].idxmax()]['peak_wind_gust']
        max_windspeed_time = hold.loc[hold['peak_wind_gust'].idxmax()]['time']
    
    NYC_wx.loc[NYC_wx.date == date, 'max_windspeed_knots'] = max_windspeed
    NYC_wx.loc[NYC_wx.date == date, 'max_windspeed_time'] = max_windspeed_time

In [39]:
# Convert knots to mph for wind speed values
NYC_wx['smph'] = NYC_wx['sknt']*1.15078
NYC_wx['max_windspeed_mph'] = NYC_wx['max_windspeed_knots']*1.15078

In [40]:
# Replace sky cover of VV with FG (VV means obstructed vertical visibility due to fog)
# We need to first check if all the values are null before stripping the string
# if spaces
if NYC_wx['skyc1'].isnull().all() == False:
    NYC_wx['skyc1'] = NYC_wx['skyc1'].str.strip()
if NYC_wx['skyc2'].isnull().all() == False:
    NYC_wx['skyc2'] = NYC_wx['skyc2'].str.strip()
if NYC_wx['skyc3'].isnull().all() == False:
    NYC_wx['skyc3'] = NYC_wx['skyc3'].str.strip()
if NYC_wx['skyc4'].isnull().all() == False:
    NYC_wx['skyc4'] = NYC_wx['skyc4'].str.strip()
    
    
NYC_wx = NYC_wx.replace(to_replace="VV", value="FG")

In [41]:
# Create hierarchy of sky coverage: (https://www.xweather.com/docs/weather-api/reference/weather-codes)
    # 1 - Clear
    # 2 - Fog
    # 3 - Few
    # 4 - Scattered
    # 5 - Broken
    # 6 - Overcast
    
# We want to create one column with the cloud coverage for that hour by taking
# all 4 coverage columns and seeing which value is the "highest"

# initialize list of lists
data = [[1, 'CLR'], [2, 'FG'], [3, 'FEW'], [4, 'SCT'], [5, 'BKN'], [6, 'OVC']]

# Create the pandas DataFrame
sky_codes = pd.DataFrame(data, columns=['Rank', 'Coverage'])

# Loop through each row, get the values from skyc1-4 and apply the hierarchy 
# checks for which one to keep. Create a new column called "sky_coverage"
result = []
for i in range(0, len(NYC_wx)):
    hold = pd.DataFrame(NYC_wx.iloc[i][['skyc1', 'skyc2', 'skyc3', 'skyc4']])
    hold.columns = ["Coverage"]
    hold_merge = hold.merge(sky_codes, on='Coverage', how='left')
    coverage_idx = hold_merge['Rank'].idxmax()
    result.append(hold.iloc[coverage_idx]['Coverage'])

NYC_wx['sky_coverage'] = result

In [42]:
# Split up the weather codes if there's a space. Make two columns
NYC_wx[['wxcode1', 'wxcode2']] = NYC_wx['wxcodes'].str.split(' ', expand=True)

In [73]:
# Save off the tables that are going to to into psql - general
general = NYC_wx[['valid', 'date', 'time', 'hour', 'tmpf', 'dwpf', 'relh', 'feel', 'vsby']]
general.columns = ['valid', 'date', 'time', 'hour', 'temperature_f', 'dewpoint_f', 'relative_humidity', 
                  'real_feel_f', 'visibility']
general.head()

Unnamed: 0,valid,date,time,hour,temperature_f,dewpoint_f,relative_humidity,real_feel_f,visibility
0,2023-06-30 00:51:00,2023-06-30,00:51:00,0,74.0,63.0,68.76,73.9,5.0
1,2023-06-30 01:51:00,2023-06-30,01:51:00,1,74.0,64.0,71.21,73.9,5.0
2,2023-06-30 02:51:00,2023-06-30,02:51:00,2,73.0,64.0,73.4,73.0,5.0
3,2023-06-30 03:51:00,2023-06-30,03:51:00,3,72.0,65.0,78.35,72.0,4.0
4,2023-06-30 04:51:00,2023-06-30,04:51:00,4,71.0,65.0,80.78,71.1,4.0


In [74]:
# Save off the tables that are going to to into psql - wind
wind = NYC_wx[['valid', 'date', 'time', 'hour', 'drct', 'smph', 'sknt', 'max_windspeed_mph', 'max_windspeed_knots', 
              'max_windspeed_time']]
wind.columns = ['valid', 'date', 'time', 'hour', 'direction', 'windspeed_mph', 'windspeed_knots', 
                  'max_windspeed_mph', 'max_windspeed_knots', 'max_windspeed_time']
wind.head()

Unnamed: 0,valid,date,time,hour,direction,windspeed_mph,windspeed_knots,max_windspeed_mph,max_windspeed_knots,max_windspeed_time
0,2023-06-30 00:51:00,2023-06-30,00:51:00,0,0.0,0.0,0.0,6.90468,6.0,16:29:00
1,2023-06-30 01:51:00,2023-06-30,01:51:00,1,0.0,4.60312,4.0,6.90468,6.0,16:29:00
2,2023-06-30 02:51:00,2023-06-30,02:51:00,2,0.0,4.60312,4.0,6.90468,6.0,16:29:00
3,2023-06-30 03:51:00,2023-06-30,03:51:00,3,0.0,0.0,0.0,6.90468,6.0,16:29:00
4,2023-06-30 04:51:00,2023-06-30,04:51:00,4,0.0,0.0,0.0,6.90468,6.0,16:29:00


In [75]:
# Save off the tables that are going to to into psql - precip
precip = NYC_wx[['valid', 'date', 'time', 'hour', 'p01i', 'wxcode1', 'wxcode2', 'ice_accretion_1hr', 
                  'ice_accretion_3hr', 'ice_accretion_6hr', 'snowdepth']]
precip.columns = ['valid', 'date', 'time', 'hour', 'one_hour_precip_amount', 'wxcode1', 'wxcode2', 
                  'ice_accretion_1hr', 'ice_accretion_3hr', 'ice_accretion_6hr', 'snowdepth']
precip.head()

Unnamed: 0,valid,date,time,hour,one_hour_precip_amount,wxcode1,wxcode2,ice_accretion_1hr,ice_accretion_3hr,ice_accretion_6hr,snowdepth
0,2023-06-30 00:51:00,2023-06-30,00:51:00,0,0.0,HZ,,0,0.0,0,0.0
1,2023-06-30 01:51:00,2023-06-30,01:51:00,1,0.0,HZ,,0,0.0,0,0.0
2,2023-06-30 02:51:00,2023-06-30,02:51:00,2,0.0,HZ,,0,0.0,0,0.0
3,2023-06-30 03:51:00,2023-06-30,03:51:00,3,0.0,HZ,,0,0.0,0,0.0
4,2023-06-30 04:51:00,2023-06-30,04:51:00,4,0.0,HZ,,0,0.0,0,0.0


In [76]:
# Save off the tables that are going to to into psql - pressure
pressure = NYC_wx[['valid', 'date', 'time', 'hour', 'alti', 'mslp']]
pressure.columns = ['valid', 'date', 'time', 'hour', 'altimeter', 'sea_level_pressure']
pressure.head()

Unnamed: 0,valid,date,time,hour,altimeter,sea_level_pressure
0,2023-06-30 00:51:00,2023-06-30,00:51:00,0,30.04,1016.4
1,2023-06-30 01:51:00,2023-06-30,01:51:00,1,30.06,1017.0
2,2023-06-30 02:51:00,2023-06-30,02:51:00,2,30.06,1017.1
3,2023-06-30 03:51:00,2023-06-30,03:51:00,3,30.06,1016.8
4,2023-06-30 04:51:00,2023-06-30,04:51:00,4,30.06,1016.9


In [77]:
# Save off the tables that are going to to into psql - wxcode
wxcode = pd.read_csv('./WxCodes.csv')
wxcode.columns = ['wxcode', 'meaning']
wxcode.head()

Unnamed: 0,wxcode,meaning
0,-,Light Moderate
1,+,Heavy
2,VC,In the Vicinity
3,MI,Shallow
4,PR,Partial


In [78]:
# Save off the tables that are going to to into psql - sky_coverage
sky_coverage = NYC_wx[['valid', 'date', 'time', 'hour', 'sky_coverage']]
sky_coverage.head()

Unnamed: 0,valid,date,time,hour,sky_coverage
0,2023-06-30 00:51:00,2023-06-30,00:51:00,0,CLR
1,2023-06-30 01:51:00,2023-06-30,01:51:00,1,CLR
2,2023-06-30 02:51:00,2023-06-30,02:51:00,2,CLR
3,2023-06-30 03:51:00,2023-06-30,03:51:00,3,CLR
4,2023-06-30 04:51:00,2023-06-30,04:51:00,4,CLR


In [116]:
# Connect to the psql database
# PSQL db connection using psycopg2
conn = psycopg2.connect(dbname='new_db', 
                            user='awesome_user', 
                            password='awesome_password', 
                            host='localhost', 
                            port='5432')
c = conn.cursor()

In [117]:
# Create new schema in new_db database
c.execute('CREATE SCHEMA IF NOT EXISTS final_project')
conn.commit()

In [101]:
# convert the dataframe to a dictionary for easier insertion into PSQL
general_dict = general.to_dict(orient='records')
wind_dict = wind.to_dict(orient='records')
precip_dict = precip.to_dict(orient='records')
pressure_dict = pressure.to_dict(orient='records')
wxcode_dict = wxcode.to_dict(orient='records')
sky_coverage_dict = sky_coverage.to_dict(orient='records')

In [118]:
# Create the tables you'd like to include in the database - for the weather data, we are going to create 6 
# different tables
c.execute('DROP TABLE IF EXISTS final_project.general')
c.execute('''
    CREATE TABLE IF NOT EXISTS final_project.general (
        id SERIAL PRIMARY KEY,
        valid timestamp NOT NULL,
        date DATE,
        time TIME,
        hour VARCHAR,
        temperature_f FLOAT,
        dewpoint_f FLOAT,
        relative_humidity FLOAT,
        real_feel_f FLOAT,
        visibility FLOAT
    )'''
)

c.execute('DROP TABLE IF EXISTS final_project.wind')
c.execute('''
    CREATE TABLE IF NOT EXISTS final_project.wind (
        id SERIAL PRIMARY KEY,
        valid timestamp NOT NULL,
        date DATE,
        time TIME,
        hour VARCHAR,
        direction FLOAT,
        windspeed_mph FLOAT,
        windspeed_knots FLOAT,
        max_windspeed_mph FLOAT,
        max_windspeed_knots FLOAT,
        max_windspeed_time TIME
    )'''
)

c.execute('DROP TABLE IF EXISTS final_project.pressure')
c.execute('''
    CREATE TABLE IF NOT EXISTS final_project.pressure (
        id SERIAL PRIMARY KEY,
        valid timestamp NOT NULL,
        date DATE,
        time TIME,
        hour VARCHAR,
        altimeter FLOAT,
        sea_level_pressure FLOAT
    )'''
)

c.execute('DROP TABLE IF EXISTS final_project.precip')
c.execute('''
    CREATE TABLE IF NOT EXISTS final_project.precip (
        id SERIAL PRIMARY KEY,
        valid timestamp NOT NULL,
        date DATE,
        time TIME,
        hour VARCHAR,
        one_hour_precip_amount VARCHAR,
        wxcode1 VARCHAR,
        wxcode2 VARCHAR,
        ice_accretion_1hr VARCHAR,
        ice_accretion_3hr VARCHAR,
        ice_accretion_6hr VARCHAR,
        snowdepth VARCHAR
    )'''
)

c.execute('DROP TABLE IF EXISTS final_project.wxcode')
c.execute('''
    CREATE TABLE IF NOT EXISTS final_project.wxcode (
        id SERIAL PRIMARY KEY,
        wxcode VARCHAR,
        meaning VARCHAR
    )'''
)

c.execute('DROP TABLE IF EXISTS final_project.sky_coverage')
c.execute('''
    CREATE TABLE IF NOT EXISTS final_project.sky_coverage (
        id SERIAL PRIMARY KEY,
        valid timestamp NOT NULL,
        date DATE,
        time TIME,
        hour VARCHAR,
        sky_coverage VARCHAR
    )'''
)

In [119]:
# insert general data into general table in final_project schema
columns = general_dict[0].keys()

query = 'INSERT INTO final_project.general ({}) VALUES %s'.format(','.join(columns))

values = [[value for value in data.values()] for data in general_dict]

psycopg2.extras.execute_values(c, query, values)
conn.commit()

In [120]:
# Check to make sure the data was properly uploaded
pd.read_sql_query("SELECT * FROM final_project.general LIMIT 100", conn)

Unnamed: 0,id,valid,date,time,hour,temperature_f,dewpoint_f,relative_humidity,real_feel_f,visibility
0,1,2023-06-30 00:51:00,2023-06-30,00:51:00,0,74.0,63.0,68.76,73.9,5.00
1,2,2023-06-30 01:51:00,2023-06-30,01:51:00,1,74.0,64.0,71.21,73.9,5.00
2,3,2023-06-30 02:51:00,2023-06-30,02:51:00,2,73.0,64.0,73.40,73.0,5.00
3,4,2023-06-30 03:51:00,2023-06-30,03:51:00,3,72.0,65.0,78.35,72.0,4.00
4,5,2023-06-30 04:51:00,2023-06-30,04:51:00,4,71.0,65.0,80.78,71.1,4.00
...,...,...,...,...,...,...,...,...,...,...
95,96,2023-07-03 02:10:00,2023-07-03,02:10:00,2,75.0,73.0,93.51,75.0,1.00
96,97,2023-07-03 02:21:00,2023-07-03,02:21:00,2,75.0,73.0,93.51,75.0,2.00
97,98,2023-07-03 02:44:00,2023-07-03,02:44:00,2,75.0,73.0,93.51,75.0,1.25
98,99,2023-07-03 02:51:00,2023-07-03,02:51:00,2,75.0,73.0,93.51,75.0,1.50


In [121]:
# insert wind data into wind table in final_project schema
columns = wind_dict[0].keys()

query = 'INSERT INTO final_project.wind ({}) VALUES %s'.format(','.join(columns))

values = [[value for value in data.values()] for data in wind_dict]

psycopg2.extras.execute_values(c, query, values)
conn.commit()

In [122]:
# Check to make sure the data was properly uploaded
pd.read_sql_query("SELECT * FROM final_project.wind LIMIT 100", conn)

Unnamed: 0,id,valid,date,time,hour,direction,windspeed_mph,windspeed_knots,max_windspeed_mph,max_windspeed_knots,max_windspeed_time
0,1,2023-06-30 00:51:00,2023-06-30,00:51:00,0,0.0,0.00000,0.0,6.90468,6.0,16:29:00
1,2,2023-06-30 01:51:00,2023-06-30,01:51:00,1,0.0,4.60312,4.0,6.90468,6.0,16:29:00
2,3,2023-06-30 02:51:00,2023-06-30,02:51:00,2,0.0,4.60312,4.0,6.90468,6.0,16:29:00
3,4,2023-06-30 03:51:00,2023-06-30,03:51:00,3,0.0,0.00000,0.0,6.90468,6.0,16:29:00
4,5,2023-06-30 04:51:00,2023-06-30,04:51:00,4,0.0,0.00000,0.0,6.90468,6.0,16:29:00
...,...,...,...,...,...,...,...,...,...,...,...
95,96,2023-07-03 02:10:00,2023-07-03,02:10:00,2,210.0,3.45234,3.0,19.56326,17.0,04:12:00
96,97,2023-07-03 02:21:00,2023-07-03,02:21:00,2,0.0,0.00000,0.0,19.56326,17.0,04:12:00
97,98,2023-07-03 02:44:00,2023-07-03,02:44:00,2,200.0,8.05546,7.0,19.56326,17.0,04:12:00
98,99,2023-07-03 02:51:00,2023-07-03,02:51:00,2,200.0,5.75390,5.0,19.56326,17.0,04:12:00


In [123]:
# insert pressure data into pressure table in final_project schema
columns = pressure_dict[0].keys()

query = 'INSERT INTO final_project.pressure ({}) VALUES %s'.format(','.join(columns))

values = [[value for value in data.values()] for data in pressure_dict]

psycopg2.extras.execute_values(c, query, values)
conn.commit()

In [124]:
# Check to make sure the data was properly uploaded
pd.read_sql_query("SELECT * FROM final_project.pressure LIMIT 100", conn)

Unnamed: 0,id,valid,date,time,hour,altimeter,sea_level_pressure
0,1,2023-06-30 00:51:00,2023-06-30,00:51:00,0,30.04,1016.4
1,2,2023-06-30 01:51:00,2023-06-30,01:51:00,1,30.06,1017.0
2,3,2023-06-30 02:51:00,2023-06-30,02:51:00,2,30.06,1017.1
3,4,2023-06-30 03:51:00,2023-06-30,03:51:00,3,30.06,1016.8
4,5,2023-06-30 04:51:00,2023-06-30,04:51:00,4,30.06,1016.9
...,...,...,...,...,...,...,...
95,96,2023-07-03 02:10:00,2023-07-03,02:10:00,2,29.83,1008.2
96,97,2023-07-03 02:21:00,2023-07-03,02:21:00,2,29.82,1008.2
97,98,2023-07-03 02:44:00,2023-07-03,02:44:00,2,29.84,1008.2
98,99,2023-07-03 02:51:00,2023-07-03,02:51:00,2,29.84,1009.3


In [125]:
# insert precip data into precip table in final_project schema
columns = precip_dict[0].keys()

query = 'INSERT INTO final_project.precip ({}) VALUES %s'.format(','.join(columns))

values = [[value for value in data.values()] for data in precip_dict]

psycopg2.extras.execute_values(c, query, values)
conn.commit()

In [126]:
# Check to make sure the data was properly uploaded
pd.read_sql_query("SELECT * FROM final_project.precip LIMIT 100", conn)

Unnamed: 0,id,valid,date,time,hour,one_hour_precip_amount,wxcode1,wxcode2,ice_accretion_1hr,ice_accretion_3hr,ice_accretion_6hr,snowdepth
0,1,2023-06-30 00:51:00,2023-06-30,00:51:00,0,0.00,HZ,,0,0.0,0,0.0
1,2,2023-06-30 01:51:00,2023-06-30,01:51:00,1,0.00,HZ,,0,0.0,0,0.0
2,3,2023-06-30 02:51:00,2023-06-30,02:51:00,2,0.00,HZ,,0,0.0,0,0.0
3,4,2023-06-30 03:51:00,2023-06-30,03:51:00,3,0.00,HZ,,0,0.0,0,0.0
4,5,2023-06-30 04:51:00,2023-06-30,04:51:00,4,0.00,HZ,,0,0.0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
95,96,2023-07-03 02:10:00,2023-07-03,02:10:00,2,0.08,-RA,BR,0,0.0,0,0.0
96,97,2023-07-03 02:21:00,2023-07-03,02:21:00,2,0.09,BR,,0,0.0,0,0.0
97,98,2023-07-03 02:44:00,2023-07-03,02:44:00,2,0.12,-RA,BR,0,0.0,0,0.0
98,99,2023-07-03 02:51:00,2023-07-03,02:51:00,2,0.13,RA,BR,0,0.0,0,0.0


In [127]:
# insert wxcode data into wxcode table in final_project schema
columns = wxcode_dict[0].keys()

query = 'INSERT INTO final_project.wxcode ({}) VALUES %s'.format(','.join(columns))

values = [[value for value in data.values()] for data in wxcode_dict]

psycopg2.extras.execute_values(c, query, values)
conn.commit()

In [128]:
# Check to make sure the data was properly uploaded
pd.read_sql_query("SELECT * FROM final_project.wxcode LIMIT 100", conn)

Unnamed: 0,id,wxcode,meaning
0,1,-,Light Moderate
1,2,+,Heavy
2,3,VC,In the Vicinity
3,4,MI,Shallow
4,5,PR,Partial
5,6,BC,Patches
6,7,DR,Low Drifting
7,8,BL,Blowing
8,9,SH,Showers
9,10,TS,Thunderstorm


In [129]:
# insert sky_coverage data into sky_coverage table in final_project schema
columns = sky_coverage_dict[0].keys()

query = 'INSERT INTO final_project.sky_coverage ({}) VALUES %s'.format(','.join(columns))

values = [[value for value in data.values()] for data in sky_coverage_dict]

psycopg2.extras.execute_values(c, query, values)
conn.commit()

In [131]:
# Check to make sure the data was properly uploaded
pd.read_sql_query("SELECT * FROM final_project.sky_coverage LIMIT 100", conn)

Unnamed: 0,id,valid,date,time,hour,sky_coverage
0,1,2023-06-30 00:51:00,2023-06-30,00:51:00,0,CLR
1,2,2023-06-30 01:51:00,2023-06-30,01:51:00,1,CLR
2,3,2023-06-30 02:51:00,2023-06-30,02:51:00,2,CLR
3,4,2023-06-30 03:51:00,2023-06-30,03:51:00,3,CLR
4,5,2023-06-30 04:51:00,2023-06-30,04:51:00,4,CLR
...,...,...,...,...,...,...
95,96,2023-07-03 02:10:00,2023-07-03,02:10:00,2,BKN
96,97,2023-07-03 02:21:00,2023-07-03,02:21:00,2,OVC
97,98,2023-07-03 02:44:00,2023-07-03,02:44:00,2,OVC
98,99,2023-07-03 02:51:00,2023-07-03,02:51:00,2,OVC


In [132]:
# Close the connection
conn.close()