In [114]:
from sklearn import tree
import pandas as pd
import pickle
import os
from sqlalchemy import create_engine

In [115]:
# Connect to the DB Server and retrieve table, export data to CSV.
engine = create_engine('postgresql+psycopg2://Grant_Nick_Proj3:Grant_Nick_Proj3@grantnickproj3.ckbobg8otu11.ap-southeast-2.rds.amazonaws.com/Project_3')
df = pd.read_sql_query('select * from "incident_data"',con=engine)

# Export the Full Table to CSV
df.to_csv('final_data.csv')

df.head(1)

Unnamed: 0,mine_id,Operational_Process,Date,State,Resource_Mined,Underground_Method,Accident_Injury_Illness_Classification,Accident_Type,Injury_Count,Sex,...,Days_Lost,Description_1,Description_2,Experience_Total,Experience_Mine,Experience_Job,Injury_Classification,Operator_Contractor,Commodity_WorkStatus,General_Incident_Type
0,1200945,Dredging_Activities,13/10/2020,Indiana,Sand & gravel,0,Handling material,Caught in-under-between a moving and stationar...,1,Female,...,0.0,EE WAS MOVING A 2X3X.75 STEEL PLATE OFF OF A P...,HE TIME OF THE INCIDENT.,1.15,1.15,1.15,NDL (No days lost),Operator,Sand & gravel operator,Handling material


## Model 1 - Nature of Injury (NOI)

In [116]:
NOI_target = df["Nature_of_Injury"]
data = df[["Sex","Age", "Occupation", "Operational_Process"]]
data.head()

Unnamed: 0,Sex,Age,Occupation,Operational_Process
0,Female,20_29,Labourer_Roles,Dredging_Activities
1,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
2,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
3,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
4,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...


In [117]:
data_dummies = pd.get_dummies(data)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data_dummies, NOI_target)

In [118]:
# Import Random Forrest, then provide a score for the model accuracy
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.2985938792390405

In [119]:
# This is how we predict and example based on default value
NOI_Prediction = rf.predict([[0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0]])

# Export the Output Headers to Excel
X_train.to_csv("Machine Learning Output - Nature of Injury.csv", index=False)

# Export the model using pickle
with open('NOI_Prediction.pickle', 'wb') as file:
    pickle.dump(NOI_Prediction, file)

# Show the importance of the columns
sorted(zip(rf.feature_importances_,data_dummies.columns))

[(0.0012974610027545958,
  'Operational_Process_Culm_Banks_Activities_Coal_Mining_Only'),
 (0.001457350953892474, 'Operational_Process_Non_Mine_Workshops_and_Yards'),
 (0.003942551743829086, 'Occupation_Apprentice_or_Trainee_Roles'),
 (0.006008941808604606, 'Age_80_89'),
 (0.012439421799763489, 'Occupation_Maritime_Roles'),
 (0.013563202260442393, 'Operational_Process_Other_Surface_Facility'),
 (0.014277469956076386, 'Age_70_79'),
 (0.019543714144557835, 'Occupation_Explosives_Roles'),
 (0.021684890362059703, 'Occupation_Other_Role'),
 (0.02207992910326936, 'Sex_Male'),
 (0.023063869577865645, 'Occupation_Driller_and_Support_Roles'),
 (0.02464287919762742, 'Sex_Female'),
 (0.02704581000951362, 'Age_Under_20'),
 (0.027460520981766438, 'Operational_Process_Office_Located_on_Mine'),
 (0.027504693019149918, 'Operational_Process_Dredging_Activities'),
 (0.03286988678009553, 'Occupation_Technical_Services_Roles'),
 (0.03588742082206003, 'Occupation_Mine_Supervisory_and_Management_Roles'),
 (

## Model 2 - Injured Body Part (IBP)

In [120]:
IBP_target = df["Injured_Body_Part"]
data = df[["Sex","Age", "Occupation", "Operational_Process"]]
data.head()

Unnamed: 0,Sex,Age,Occupation,Operational_Process
0,Female,20_29,Labourer_Roles,Dredging_Activities
1,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
2,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
3,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
4,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...


In [121]:
data_dummies = pd.get_dummies(data)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data_dummies, IBP_target)

In [122]:
# Import Random Forrest, then provide a score for the model accuracy
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.24152191894127378

In [123]:
# This is how we predict and example based on default value
IBP_Prediction = rf.predict([[0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0]])

# Export the Output Headers to Excel
X_train.to_csv("Machine Learning Output - Injured Body Part.csv", index=False)

# Export the model using pickle
with open('IBP_Prediction.pickle', 'wb') as file:
    pickle.dump(IBP_Prediction, file)

# Show the importance of the columns
sorted(zip(rf.feature_importances_,data_dummies.columns))

[(0.0034558809424047854,
  'Operational_Process_Culm_Banks_Activities_Coal_Mining_Only'),
 (0.0044385439765227035, 'Occupation_Apprentice_or_Trainee_Roles'),
 (0.004488003743523332, 'Age_80_89'),
 (0.004523073761013337, 'Operational_Process_Non_Mine_Workshops_and_Yards'),
 (0.008850047702066377, 'Age_70_79'),
 (0.012284772534751236, 'Operational_Process_Other_Surface_Facility'),
 (0.014969256713513132, 'Occupation_Maritime_Roles'),
 (0.01881055089260739, 'Age_Under_20'),
 (0.020519665872935175, 'Occupation_Other_Role'),
 (0.022389862320286953, 'Occupation_Explosives_Roles'),
 (0.02746372944960956, 'Occupation_Driller_and_Support_Roles'),
 (0.02807185757107113, 'Operational_Process_Office_Located_on_Mine'),
 (0.03279196504000914, 'Sex_Female'),
 (0.03293867665518075, 'Sex_Male'),
 (0.033376803690127, 'Operational_Process_Dredging_Activities'),
 (0.037053604374256165, 'Occupation_Mine_Supervisory_and_Management_Roles'),
 (0.03753481505780833, 'Occupation_Technical_Services_Roles'),
 (0.0

## Model 3 - Source of Injury (SOI)

In [124]:
SOI_target = df["Source_of_Injury"]
data = df[["Sex","Age", "Occupation", "Operational_Process"]]
data.head()

Unnamed: 0,Sex,Age,Occupation,Operational_Process
0,Female,20_29,Labourer_Roles,Dredging_Activities
1,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
2,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
3,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
4,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...


In [125]:
data_dummies = pd.get_dummies(data)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data_dummies, SOI_target)

In [126]:
# Import Random Forrest, then provide a score for the model accuracy
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.1348221670802316

In [127]:
# This is how we predict and example based on default value
SOI_Prediction = rf.predict([[0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0]])

# Export the Output Headers to Excel
X_train.to_csv("Machine Learning Output - Source of Injury.csv", index=False)

# Export the model using pickle
with open('SOI_Prediction.pickle', 'wb') as file:
    pickle.dump(SOI_Prediction, file)

# Show Model Value importances
sorted(zip(rf.feature_importances_,data_dummies.columns))

[(0.0009669451161756159,
  'Operational_Process_Culm_Banks_Activities_Coal_Mining_Only'),
 (0.0030908909944466563, 'Age_80_89'),
 (0.003181699966623286, 'Operational_Process_Non_Mine_Workshops_and_Yards'),
 (0.003732098877745286, 'Occupation_Apprentice_or_Trainee_Roles'),
 (0.009725874797871622, 'Operational_Process_Other_Surface_Facility'),
 (0.012777589184586017, 'Age_70_79'),
 (0.013124683603418827, 'Occupation_Maritime_Roles'),
 (0.01712683138941108, 'Occupation_Explosives_Roles'),
 (0.01754536335662483, 'Occupation_Other_Role'),
 (0.019255394098748463, 'Operational_Process_Office_Located_on_Mine'),
 (0.02305352319621466, 'Operational_Process_Dredging_Activities'),
 (0.023881239652552278, 'Occupation_Driller_and_Support_Roles'),
 (0.025755351120581214, 'Age_Under_20'),
 (0.029686944397411347, 'Sex_Male'),
 (0.03116405431228769, 'Occupation_Technical_Services_Roles'),
 (0.03147575350100378, 'Sex_Female'),
 (0.03152861520395103,
  'Operational_Process_Underground_Mine_Surface_Worksho

## Model 4 - Activity (ACT)

In [128]:
ACT_target = df["Activity"]
data = df[["Sex","Age", "Occupation", "Operational_Process"]]
data.head()

Unnamed: 0,Sex,Age,Occupation,Operational_Process
0,Female,20_29,Labourer_Roles,Dredging_Activities
1,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
2,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
3,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
4,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...


In [129]:
data_dummies = pd.get_dummies(data)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data_dummies, ACT_target)

In [130]:
# Import Random Forrest, then provide a score for the model accuracy
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.22249793217535152

In [131]:
# This is how we predict and example based on default value
ACT_Prediction = rf.predict([[0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0]])

# Export the Output Headers to Excel
X_train.to_csv("Machine Learning Output - Activity.csv", index=False)

# Export the model using pickle
with open('ACT_Prediction.pickle', 'wb') as file:
    pickle.dump(ACT_Prediction, file)

# Show Model Value importances
sorted(zip(rf.feature_importances_,data_dummies.columns))

[(0.0, 'Operational_Process_Culm_Banks_Activities_Coal_Mining_Only'),
 (0.0009414841801496667, 'Operational_Process_Non_Mine_Workshops_and_Yards'),
 (0.003359435468512724, 'Occupation_Apprentice_or_Trainee_Roles'),
 (0.006369677584232145, 'Age_80_89'),
 (0.010251651843119998, 'Occupation_Maritime_Roles'),
 (0.01043069518345463, 'Operational_Process_Other_Surface_Facility'),
 (0.01382157063352584, 'Age_70_79'),
 (0.015022449109384746, 'Occupation_Explosives_Roles'),
 (0.015863390860544127, 'Occupation_Other_Role'),
 (0.018845662789888036, 'Operational_Process_Office_Located_on_Mine'),
 (0.020966465774506046, 'Occupation_Driller_and_Support_Roles'),
 (0.023129123417539758, 'Age_Under_20'),
 (0.02321669917490591, 'Operational_Process_Dredging_Activities'),
 (0.024087618841354054, 'Occupation_Technical_Services_Roles'),
 (0.026649957518663024, 'Occupation_Mine_Supervisory_and_Management_Roles'),
 (0.02809725015046778, 'Sex_Male'),
 (0.028439896943869957, 'Sex_Female'),
 (0.0284523498003413

# Summary of Prediction

In [132]:
# Import Pickle Files for predicted values
# Open the file in binary mode
with open('NOI_Prediction.pickle', 'rb') as file:
    NOI_Predictions = pickle.load(file)

with open('IBP_Prediction.pickle', 'rb') as file:
    IBP_Predictions = pickle.load(file)
    
with open('SOI_Prediction.pickle', 'rb') as file:
    SOI_Predictions = pickle.load(file)

with open('ACT_Prediction.pickle', 'rb') as file:
    ACT_Predictions = pickle.load(file)

# Summarise the model results
print(f"Model Predictions:")
print(f"")
print(f"Nature of injury: {NOI_Predictions[0]}")
print(f"Injured Body Part: {IBP_Predictions[0]}")
print(f"Source of Injury: {SOI_Predictions[0]}")
print(f"Activity: {ACT_Predictions[0]}")

Model Predictions:

Nature of injury: Fracture, chip
Injured Body Part: Wrist,_Hand_and_Fingers
Source of Injury: Boilers
Activity: Handling supplies or material, load and unload
