In [1]:
import pandas as pd
from datetime import date

from sklearn.preprocessing import StandardScaler, OneHotEncoder

from sqlalchemy import create_engine
from config import db_password

In [2]:
# Import clean tensile, extruder, weather data
path = '../Resources/Clean Data/'

extruder_df = pd.read_csv(f'{path}final_extruder.csv')
tensile_df = pd.read_csv(f'{path}final_tensile.csv')
weather_df = pd.read_csv(f'{path}final_weather.csv')

In [3]:
# get extruder data
extruder_df=extruder_df.rename(columns={'DateCode_Full' : 'date_code'})
extruder_df

Unnamed: 0,DateTime_Stamp,Machine,date_code,Operator,RunFootage,LengthOffset,Z1,Z2,Z3,Z4,...,CS_Speed_Real,Nip_Speed_Real,Screw_Speed_Real,Back_Pressure_Real,Melt_Temp_Real,Chiller_Temp_Real,Motor_Amps_Real,TotalRunFootage,date,width
0,2022-03-01 15:53:00,1,22-A-100,2167,1250,500,178.1,188.0,189.9,190.1,...,8.802,8.835,22.308,1588,202.3,14.1,194.592,750,2022-03-01,600
1,2022-03-01 15:26:00,1,22-A-100,2167,1250,250,178.3,188.1,190.1,189.9,...,8.797,8.802,22.342,1585,202.0,14.0,192.766,1000,2022-03-01,600
2,2022-03-01 14:13:00,1,22-A-99,2238,1250,500,178.8,189.0,189.9,189.9,...,8.802,8.810,22.374,1588,201.5,13.6,191.319,750,2022-03-01,600
3,2022-03-01 13:45:00,1,22-A-99,2238,1250,250,178.9,189.7,189.9,189.9,...,8.800,8.764,22.349,1578,201.5,13.8,188.093,1000,2022-03-01,600
4,2022-03-01 13:28:00,1,22-A-99,2238,1250,1000,178.2,189.1,189.9,189.9,...,8.806,8.825,22.335,1572,201.7,13.6,188.235,250,2022-03-01,600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7661,2014-07-23 18:23:00,2,14-B-1653,1251,28000,250,175.0,200.0,199.2,204.7,...,5.478,5.580,48.027,1331,206.3,3.4,230.424,27750,2014-07-23,700
7662,2014-07-23 15:59:00,2,14-B-1650,1251,28000,250,176.7,201.4,200.9,204.9,...,5.513,5.580,48.021,1361,208.0,2.5,234.302,27750,2014-07-23,700
7663,2014-07-23 15:59:00,2,14-B-1651,1251,28000,250,176.7,201.4,200.9,204.9,...,5.513,5.580,48.021,1361,208.0,2.5,234.302,27750,2014-07-23,700
7664,2014-07-07 19:52:00,2,14-B-1479,1548,48000,250,176.3,200.7,201.9,205.3,...,5.650,5.650,51.013,1888,206.0,4.9,236.668,47750,2014-07-07,700


In [4]:
# inspect final tensile
tensile_df.head()

Unnamed: 0,date_code,thickness,amb_tensile_pre_cure,amb_elongation_pre_cure,amb_elongation_result,amb_tensile_result,hot_tensile_result,hot_elongation_result,overall_result
0,17-B-1045,65.0,23.9,610.0,1,1,1,1,1
1,17-B-1046,65.0,24.6,640.0,1,1,1,1,1
2,17-B-1047,65.0,25.7,701.0,1,1,1,1,1
3,17-B-1048,65.0,23.8,648.0,1,1,1,1,1
4,17-B-1049,65.0,23.2,599.0,1,1,1,1,1


In [5]:
# Join on date code (tensile and extruder)
extruder_tensile_df = pd.merge(extruder_df,tensile_df,on='date_code')
extruder_tensile_df.head()

Unnamed: 0,DateTime_Stamp,Machine,date_code,Operator,RunFootage,LengthOffset,Z1,Z2,Z3,Z4,...,date,width,thickness,amb_tensile_pre_cure,amb_elongation_pre_cure,amb_elongation_result,amb_tensile_result,hot_tensile_result,hot_elongation_result,overall_result
0,2022-02-28 23:01:00,1,22-A-91,2223,1250,500,178.0,188.1,189.9,189.9,...,2022-02-28,700,65.0,27.7,740.0,1,1,1,1,1
1,2022-02-28 22:32:00,1,22-A-91,2167,1250,250,177.9,188.2,190.1,190.0,...,2022-02-28,700,65.0,27.7,740.0,1,1,1,1,1
2,2022-02-28 21:14:00,1,22-A-90,2167,1250,500,177.7,188.0,190.0,190.0,...,2022-02-28,700,65.0,18.1,595.0,1,1,1,1,1
3,2022-02-28 20:45:00,1,22-A-90,2167,1250,250,178.2,188.2,190.0,190.1,...,2022-02-28,700,65.0,18.1,595.0,1,1,1,1,1
4,2022-02-28 19:30:00,1,22-A-89,2167,1250,500,178.0,188.3,190.1,190.0,...,2022-02-28,700,65.0,17.2,556.0,1,1,1,1,1


In [6]:
# join on into oven date (weather data)
full_merge_df = pd.merge(extruder_tensile_df,weather_df,on='date')
full_merge_df

Unnamed: 0,DateTime_Stamp,Machine,date_code,Operator,RunFootage,LengthOffset,Z1,Z2,Z3,Z4,...,amb_elongation_result,amb_tensile_result,hot_tensile_result,hot_elongation_result,overall_result,month,ten_day_mean,ten_day_max,ten_day_min,ten_day_swing
0,2022-02-28 23:01:00,1,22-A-91,2223,1250,500,178.0,188.1,189.9,189.9,...,1,1,1,1,1,2,-7.500000,11.9,-26.3,38.2
1,2022-02-28 22:32:00,1,22-A-91,2167,1250,250,177.9,188.2,190.1,190.0,...,1,1,1,1,1,2,-7.500000,11.9,-26.3,38.2
2,2022-02-28 21:14:00,1,22-A-90,2167,1250,500,177.7,188.0,190.0,190.0,...,1,1,1,1,1,2,-7.500000,11.9,-26.3,38.2
3,2022-02-28 20:45:00,1,22-A-90,2167,1250,250,178.2,188.2,190.0,190.1,...,1,1,1,1,1,2,-7.500000,11.9,-26.3,38.2
4,2022-02-28 19:30:00,1,22-A-89,2167,1250,500,178.0,188.3,190.1,190.0,...,1,1,1,1,1,2,-7.500000,11.9,-26.3,38.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2572,2017-04-30 12:40:00,2,17-B-1045,1546,16000,1000,179.3,224.8,220.1,210.8,...,1,1,1,1,1,4,5.327273,15.2,-2.9,18.1
2573,2017-04-30 11:00:00,2,17-B-1045,1546,16000,250,181.0,224.9,220.1,208.8,...,1,1,1,1,1,4,5.327273,15.2,-2.9,18.1
2574,2017-04-30 10:09:00,2,17-B-1045,1546,16000,250,182.3,224.7,219.4,208.7,...,1,1,1,1,1,4,5.327273,15.2,-2.9,18.1
2575,2017-04-30 04:20:00,2,17-B-1045,1580,16000,250,180.3,238.6,234.0,234.5,...,1,1,1,1,1,4,5.327273,15.2,-2.9,18.1


In [7]:
#check columns, quality
full_merge_df.columns

Index(['DateTime_Stamp', 'Machine', 'date_code', 'Operator', 'RunFootage',
       'LengthOffset', 'Z1', 'Z2', 'Z3', 'Z4', 'Z5', 'Z6', 'Z7', 'Z8', 'Z9',
       'Z10', 'Z11', 'Z12', 'Z13', 'Z14', 'Z15', 'Z16', 'Z17', 'Z18', 'Z19',
       'CST_TOP_Real', 'CST_Centre_Real', 'CST_Bottom_Real', 'CS_Speed_Real',
       'Nip_Speed_Real', 'Screw_Speed_Real', 'Back_Pressure_Real',
       'Melt_Temp_Real', 'Chiller_Temp_Real', 'Motor_Amps_Real',
       'TotalRunFootage', 'date', 'width', 'thickness', 'amb_tensile_pre_cure',
       'amb_elongation_pre_cure', 'amb_elongation_result',
       'amb_tensile_result', 'hot_tensile_result', 'hot_elongation_result',
       'overall_result', 'month', 'ten_day_mean', 'ten_day_max', 'ten_day_min',
       'ten_day_swing'],
      dtype='object')

In [8]:
# remove unneeded columns
no_dates_df=full_merge_df.drop(columns=['date','DateTime_Stamp', 'date_code'])
no_dates_df

Unnamed: 0,Machine,Operator,RunFootage,LengthOffset,Z1,Z2,Z3,Z4,Z5,Z6,...,amb_elongation_result,amb_tensile_result,hot_tensile_result,hot_elongation_result,overall_result,month,ten_day_mean,ten_day_max,ten_day_min,ten_day_swing
0,1,2223,1250,500,178.0,188.1,189.9,189.9,185.0,188.5,...,1,1,1,1,1,2,-7.500000,11.9,-26.3,38.2
1,1,2167,1250,250,177.9,188.2,190.1,190.0,183.2,188.6,...,1,1,1,1,1,2,-7.500000,11.9,-26.3,38.2
2,1,2167,1250,500,177.7,188.0,190.0,190.0,184.0,184.3,...,1,1,1,1,1,2,-7.500000,11.9,-26.3,38.2
3,1,2167,1250,250,178.2,188.2,190.0,190.1,184.4,184.4,...,1,1,1,1,1,2,-7.500000,11.9,-26.3,38.2
4,1,2167,1250,500,178.0,188.3,190.1,190.0,182.8,183.7,...,1,1,1,1,1,2,-7.500000,11.9,-26.3,38.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2572,2,1546,16000,1000,179.3,224.8,220.1,210.8,202.5,200.2,...,1,1,1,1,1,4,5.327273,15.2,-2.9,18.1
2573,2,1546,16000,250,181.0,224.9,220.1,208.8,202.2,204.5,...,1,1,1,1,1,4,5.327273,15.2,-2.9,18.1
2574,2,1546,16000,250,182.3,224.7,219.4,208.7,201.8,203.5,...,1,1,1,1,1,4,5.327273,15.2,-2.9,18.1
2575,2,1580,16000,250,180.3,238.6,234.0,234.5,221.6,229.3,...,1,1,1,1,1,4,5.327273,15.2,-2.9,18.1


In [9]:
enc = OneHotEncoder(sparse=False)

encoded_df = pd.DataFrame(enc.fit_transform(no_dates_df[['Machine','Operator']]))

encoded_df.columns = enc.get_feature_names(['Machine','Operator'])
encoded_df.head()

Unnamed: 0,Machine_1,Machine_2,Operator_0,Operator_1218,Operator_1251,Operator_1324,Operator_1476,Operator_1546,Operator_1580,Operator_1731,...,Operator_2167,Operator_2178,Operator_2198,Operator_2201,Operator_2218,Operator_2223,Operator_2238,Operator_2262,Operator_2275,Operator_2469
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
# merge on to originals, remove unneeded columns
full_encoded_df = no_dates_df.merge(encoded_df,left_index=True,right_index=True).drop(columns=['Machine','Operator'])
full_encoded_df.head()

Unnamed: 0,RunFootage,LengthOffset,Z1,Z2,Z3,Z4,Z5,Z6,Z7,Z8,...,Operator_2167,Operator_2178,Operator_2198,Operator_2201,Operator_2218,Operator_2223,Operator_2238,Operator_2262,Operator_2275,Operator_2469
0,1250,500,178.0,188.1,189.9,189.9,185.0,188.5,195.6,195.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,1250,250,177.9,188.2,190.1,190.0,183.2,188.6,195.5,194.8,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1250,500,177.7,188.0,190.0,190.0,184.0,184.3,195.3,195.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1250,250,178.2,188.2,190.0,190.1,184.4,184.4,195.2,195.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1250,500,178.0,188.3,190.1,190.0,182.8,183.7,195.2,194.9,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
# export to csv prepped and ready to go

full_encoded_df.to_csv('../Resources/Clean Data/encoded_data.csv',index=False)

In [12]:
# connect to PostrgreSQL
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/polypropylene_analysis_db"
engine = create_engine(db_string)

In [13]:
# export to PostgreSQL
full_encoded_df.to_sql(name='encoded_raw_data', con=engine, if_exists='replace')