In [1]:
# Import our dependencies
import pandas as pd
import os
from sqlalchemy import create_engine
import time
import datetime
from config import password

In [2]:
# Bring in CSVs
weather = 'Resources/Weather_Summary.csv'
wwii_data = 'Resources/THOR_WWII_DATA_CLEAN.csv'
wwii_aircraft_data = 'Resources/THOR_WWII_AIRCRAFT_GLOSS.csv'
wwii_weapons_data = 'Resources/THOR_WWII_WEAPON_GLOSS.csv'

In [3]:
# Read in Weather CSV 
weather_df = pd.read_csv(weather, encoding = "ISO-8859-1", low_memory=False)
weather_df.head()

Unnamed: 0,STA,Date,Precip,WindGustSpd,MaxTemp,MinTemp,MeanTemp,Snowfall,PoorWeather,YR,...,FB,FTI,ITH,PGT,TSHDSBRSGF,SD3,RHX,RHN,RVG,WTE
0,10001,1942-7-1,1.016,,25.555556,22.222222,23.888889,0,,42,...,,,,,,,,,,
1,10001,1942-7-2,0.0,,28.888889,21.666667,25.555556,0,,42,...,,,,,,,,,,
2,10001,1942-7-3,2.54,,26.111111,22.222222,24.444444,0,,42,...,,,,,,,,,,
3,10001,1942-7-4,2.54,,26.666667,22.222222,24.444444,0,,42,...,,,,,,,,,,
4,10001,1942-7-5,0.0,,26.666667,21.666667,24.444444,0,,42,...,,,,,,,,,,


In [18]:
weather_df.columns

Index(['STA', 'Date', 'Precip', 'WindGustSpd', 'MaxTemp', 'MinTemp',
       'MeanTemp', 'Snowfall', 'PoorWeather', 'YR', 'MO', 'DA', 'PRCP', 'DR',
       'SPD', 'MAX', 'MIN', 'MEA', 'SNF', 'SND', 'FT', 'FB', 'FTI', 'ITH',
       'PGT', 'TSHDSBRSGF', 'SD3', 'RHX', 'RHN', 'RVG', 'WTE'],
      dtype='object')

In [19]:
# Organize Weather data to list weather components
# Create a new table with the selected columns
weather_data_clean = weather_df[['Date', 'MaxTemp', 'MinTemp','MAX', 'MIN', 'Precip', 'WindGustSpd', 'Snowfall', 'PoorWeather']].copy()

In [20]:
# Print clean weather data
weather_data_clean.head()

Unnamed: 0,Date,MaxTemp,MinTemp,MAX,MIN,Precip,WindGustSpd,Snowfall,PoorWeather
0,1942-7-1,25.555556,22.222222,78.0,72.0,1.016,,0,
1,1942-7-2,28.888889,21.666667,84.0,71.0,0.0,,0,
2,1942-7-3,26.111111,22.222222,79.0,72.0,2.54,,0,
3,1942-7-4,26.666667,22.222222,80.0,72.0,2.54,,0,
4,1942-7-5,26.666667,21.666667,80.0,71.0,0.0,,0,


In [21]:
# Read in Thor Weather CSV
thor_weather = pd.read_csv(wwii_data, encoding = "ISO-8859-1", low_memory=False)
thor_weather.head()

Unnamed: 0,WWII_ID,MASTER_INDEX_NUMBER,MSNDATE,THEATER,NAF,COUNTRY_FLYING_MISSION,TGT_COUNTRY_CODE,TGT_COUNTRY,TGT_LOCATION,TGT_TYPE,...,CALLSIGN,ROUNDS_AMMO,SPARES_RETURN_AC,WX_FAIL_AC,MECH_FAIL_AC,MISC_FAIL_AC,TARGET_COMMENT,MISSION_COMMENTS,SOURCE,DATABASE_EDIT_COMMENTS
0,1,,8/15/1943,MTO,12 AF,USA,13.0,ITALY,SPADAFORA,,...,,,,,,,,,,
1,4285,20028.0,2/20/1945,PTO,5 AF,USA,,PHILIPPINE ISLANDS,PUERTA PRINCESA,UNIDENTIFIED TARGET,...,,,,,,,,,,
2,3,,8/15/1943,MTO,12 AF,USA,13.0,ITALY,COSENZA,,...,,,,,,,,,,
3,4,,8/15/1943,MTO,12 AF,USA,13.0,ITALY,GIOJA TAURO,,...,,,,,,1.0,,,,
4,8167,14639.0,2/23/1945,PTO,5 AF,USA,,PHILIPPINE ISLANDS,BALETE PASS,WOODED AREA,...,,,,,,,,,,


In [22]:
# Print Thor Weather columns
thor_weather.columns

Index(['WWII_ID', 'MASTER_INDEX_NUMBER', 'MSNDATE', 'THEATER', 'NAF',
       'COUNTRY_FLYING_MISSION', 'TGT_COUNTRY_CODE', 'TGT_COUNTRY',
       'TGT_LOCATION', 'TGT_TYPE', 'TGT_ID', 'TGT_INDUSTRY_CODE',
       'TGT_INDUSTRY', 'SOURCE_LATITUDE', 'SOURCE_LONGITUDE', 'LATITUDE',
       'LONGITUDE', 'UNIT_ID', 'MDS', 'AIRCRAFT_NAME', 'MSN_TYPE',
       'TGT_PRIORITY', 'TGT_PRIORITY_EXPLANATION', 'AC_ATTACKING', 'ALTITUDE',
       'ALTITUDE_FEET', 'NUMBER_OF_HE', 'TYPE_OF_HE', 'LBS_HE', 'TONS_OF_HE',
       'NUMBER_OF_IC', 'TYPE_OF_IC', 'LBS_IC', 'TONS_OF_IC', 'NUMBER_OF_FRAG',
       'TYPE_OF_FRAG', 'LBS_FRAG', 'TONS_OF_FRAG', 'TOTAL_LBS', 'TOTAL_TONS',
       'TAKEOFF_BASE', 'TAKEOFF_COUNTRY', 'TAKEOFF_LATITUDE',
       'TAKEOFF_LONGITUDE', 'AC_LOST', 'AC_DAMAGED', 'AC_AIRBORNE',
       'AC_DROPPING', 'TIME_OVER_TARGET', 'SIGHTING_METHOD_CODE',
       'SIGHTING_EXPLANATION', 'BDA', 'CALLSIGN', 'ROUNDS_AMMO',
       'SPARES_RETURN_AC', 'WX_FAIL_AC', 'MECH_FAIL_AC', 'MISC_FAIL_AC',
       

In [23]:
# Organize Thor weather data to list location as well as aircraft failures
thor_weather_fails = thor_weather[['LATITUDE','LONGITUDE','MSNDATE', 'WX_FAIL_AC', 'MECH_FAIL_AC', 'MISC_FAIL_AC']].copy()
thor_weather_fails

Unnamed: 0,LATITUDE,LONGITUDE,MSNDATE,WX_FAIL_AC,MECH_FAIL_AC,MISC_FAIL_AC
0,38.220000,15.370000,8/15/1943,,,
1,9.750000,118.750000,2/20/1945,,,
2,39.270000,16.250000,8/15/1943,,,
3,38.430000,15.900000,8/15/1943,,,1.0
4,16.083333,120.916667,2/23/1945,,,
...,...,...,...,...,...,...
178276,-8.472085,148.234293,12/14/1942,,,
178277,4.883333,38.083333,9/18/1940,,,
178278,-8.471111,126.458333,11/17/1942,,,
178279,-2.583333,150.833333,2/16/1945,,,


In [24]:
# Drop rows that don't have any failed aircraft
# Print clean Thor weather data
thor_weather_clean = thor_weather_fails.dropna(thresh = 1, subset = ['WX_FAIL_AC', 'MECH_FAIL_AC', 'MISC_FAIL_AC'])
thor_weather_clean

Unnamed: 0,LATITUDE,LONGITUDE,MSNDATE,WX_FAIL_AC,MECH_FAIL_AC,MISC_FAIL_AC
3,38.43,15.90,8/15/1943,,,1.0
1237,39.27,16.25,8/19/1943,1.0,,
1838,40.67,14.73,8/22/1943,,,10.0
2139,39.80,16.20,8/24/1943,12.0,,
3810,40.07,15.63,8/31/1943,,1.0,
...,...,...,...,...,...,...
176197,38.02,14.60,8/7/1943,4.0,,
176490,38.02,15.12,8/8/1943,2.0,,
177685,38.22,15.37,8/13/1943,,,2.0
178019,38.43,15.90,8/14/1943,,,3.0


In [25]:
# Convert dates to proper formatting
weather_dates = weather_data_clean['Date'] = pd.to_datetime(weather_data_clean['Date'])

In [27]:
# Rename columns to make dataframes cleaner
weather_data_clean = weather_data_clean.rename(columns= {'Date':'Dates','MAX':'MaxTempF','MIN':'MinTempF', 'MaxTemp': 'MaxTempC', 'MinTemp': 'MinTempC', 'Precip':'Precipitation',})
weather_data_clean

Unnamed: 0,Dates,MaxTempC,MinTempC,MaxTempF,MinTempF,Precipitation,WindGustSpd,Snowfall,PoorWeather
0,1942-07-01,25.555556,22.222222,78.0,72.0,1.016,,0,
1,1942-07-02,28.888889,21.666667,84.0,71.0,0,,0,
2,1942-07-03,26.111111,22.222222,79.0,72.0,2.54,,0,
3,1942-07-04,26.666667,22.222222,80.0,72.0,2.54,,0,
4,1942-07-05,26.666667,21.666667,80.0,71.0,0,,0,
...,...,...,...,...,...,...,...,...,...
119035,1945-12-27,28.333333,18.333333,83.0,65.0,0,,0,
119036,1945-12-28,29.444444,18.333333,85.0,65.0,9.906,,0,1
119037,1945-12-29,28.333333,18.333333,83.0,65.0,0,,0,1
119038,1945-12-30,28.333333,18.333333,83.0,65.0,0,,0,


In [29]:
# Print updated clean Thor weather data
thor_weather_clean['MSNDATE'] = pd.to_datetime(thor_weather_clean['MSNDATE'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [30]:
thor_weather_clean = thor_weather_clean.rename(columns= {'LATITUDE':'Latitude','LONGITUDE':'Longitude','MSNDATE':'MissionDate', 'WX_FAIL_AC': 'WeatherFails', 'MECH_FAIL_AC': 'MechanicalFails', 'MISC_FAIL_AC':'MiscFails',})
thor_weather_clean

Unnamed: 0,Latitude,Longitude,MissionDate,WeatherFails,MechanicalFails,MiscFails
3,38.43,15.90,1943-08-15,,,1.0
1237,39.27,16.25,1943-08-19,1.0,,
1838,40.67,14.73,1943-08-22,,,10.0
2139,39.80,16.20,1943-08-24,12.0,,
3810,40.07,15.63,1943-08-31,,1.0,
...,...,...,...,...,...,...
176197,38.02,14.60,1943-08-07,4.0,,
176490,38.02,15.12,1943-08-08,2.0,,
177685,38.22,15.37,1943-08-13,,,2.0
178019,38.43,15.90,1943-08-14,,,3.0


In [7]:
#postgres_conn_string = f"postgres:{password}"
#engine = create_engine(f"postgresql://{postgres_conn_string}@localhost:5433/wwii_tables")

ModuleNotFoundError: No module named 'psycopg2'

In [8]:
#engine.table_names()

NameError: name 'engine' is not defined

In [None]:
#weather_data_clean_df.to_sql(event_id ='id', con=engine, if_exists='append', index=False)