In [85]:
from sklearn import tree
import pandas as pd
import pickle
import os
from sqlalchemy import create_engine

In [86]:
# Connect to the DB Server and retrieve table, export data to CSV.
engine = create_engine('postgresql+psycopg2://Grant_Nick_Proj3:Grant_Nick_Proj3@grantnickproj3.ckbobg8otu11.ap-southeast-2.rds.amazonaws.com/Project_3')
df = pd.read_sql_query('select * from "incident_data"',con=engine)

# Export the Full Table to CSV
df.to_csv('final_data.csv')

df.head(1)

Unnamed: 0,mine_id,Operational_Process,Date,State,Resource_Mined,Underground_Method,Accident_Injury_Illness_Classification,Accident_Type,Injury_Count,Sex,...,Days_Lost,Description_1,Description_2,Experience_Total,Experience_Mine,Experience_Job,Injury_Classification,Operator_Contractor,Commodity_WorkStatus,General_Incident_Type
0,1200945,Dredging_Activities,13/10/2020,Indiana,Sand & gravel,0,Handling material,Caught in-under-between a moving and stationar...,1,Female,...,0.0,EE WAS MOVING A 2X3X.75 STEEL PLATE OFF OF A P...,HE TIME OF THE INCIDENT.,1.15,1.15,1.15,NDL (No days lost),Operator,Sand & gravel operator,Handling material


## Model 1 - Nature of Injury (NOI)

In [87]:
NOI_target = df["Nature_of_Injury"]
data = df[["Sex","Age", "Occupation", "Operational_Process"]]
data.head()

Unnamed: 0,Sex,Age,Occupation,Operational_Process
0,Female,20_29,Labourer_Roles,Dredging_Activities
1,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
2,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
3,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
4,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...


In [88]:
data_dummies = pd.get_dummies(data)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data_dummies, NOI_target)

In [89]:
# Import Random Forrest, then provide a score for the model accuracy
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.28453267162944584

In [90]:
# This is how we predict and example based on default value
NOI_Prediction = rf.predict([[0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0]])

# Export the Output Headers to Excel
X_train.to_csv("Machine Learning Output - Nature of Injury.csv", index=False)

# Export the model using pickle
with open('NOI_rf.pickle', 'wb') as file:
    pickle.dump(rf, file)

# Show the importance of the columns
sorted(zip(rf.feature_importances_,data_dummies.columns))

[(0.001281905267490763,
  'Operational_Process_Culm_Banks_Activities_Coal_Mining_Only'),
 (0.0034961272590467024, 'Age_80_89'),
 (0.004177171758899482, 'Operational_Process_Non_Mine_Workshops_and_Yards'),
 (0.004922364948695715, 'Occupation_Apprentice_or_Trainee_Roles'),
 (0.010769149613072147, 'Age_70_79'),
 (0.012433501701305399, 'Operational_Process_Other_Surface_Facility'),
 (0.013951900259587688, 'Occupation_Maritime_Roles'),
 (0.019406408500014623, 'Occupation_Explosives_Roles'),
 (0.02202516016194286, 'Age_Under_20'),
 (0.02258073218339834, 'Sex_Female'),
 (0.02350724308850091, 'Sex_Male'),
 (0.023721401441487388, 'Occupation_Other_Role'),
 (0.026227367704194458, 'Occupation_Driller_and_Support_Roles'),
 (0.027170402117076154, 'Operational_Process_Office_Located_on_Mine'),
 (0.03311825731422905, 'Occupation_Technical_Services_Roles'),
 (0.03343476872082854, 'Operational_Process_Dredging_Activities'),
 (0.03735823951574866, 'Occupation_Mine_Supervisory_and_Management_Roles'),
 (0

## Model 2 - Injured Body Part (IBP)

In [91]:
IBP_target = df["Injured_Body_Part"]
data = df[["Sex","Age", "Occupation", "Operational_Process"]]
data.head()

Unnamed: 0,Sex,Age,Occupation,Operational_Process
0,Female,20_29,Labourer_Roles,Dredging_Activities
1,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
2,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
3,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
4,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...


In [92]:
data_dummies = pd.get_dummies(data)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data_dummies, IBP_target)

In [93]:
# Import Random Forrest, then provide a score for the model accuracy
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.24152191894127378

In [94]:
# This is how we predict and example based on default value
IBP_Prediction = rf.predict([[0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0]])

# Export the Output Headers to Excel
X_train.to_csv("Machine Learning Output - Injured Body Part.csv", index=False)

# Export the model using pickle
with open('IBP_rf.pickle', 'wb') as file:
    pickle.dump(rf, file)

# Show the importance of the columns
sorted(zip(rf.feature_importances_,data_dummies.columns))

[(0.002196567341128169, 'Occupation_Apprentice_or_Trainee_Roles'),
 (0.003215635670287909,
  'Operational_Process_Culm_Banks_Activities_Coal_Mining_Only'),
 (0.0032625439344460207, 'Operational_Process_Non_Mine_Workshops_and_Yards'),
 (0.005890543704925338, 'Age_80_89'),
 (0.009798298868049492, 'Age_70_79'),
 (0.012887052284241848, 'Operational_Process_Other_Surface_Facility'),
 (0.014515106166118272, 'Occupation_Maritime_Roles'),
 (0.0198385719529404, 'Occupation_Other_Role'),
 (0.02083489982993222, 'Age_Under_20'),
 (0.021379866129613206, 'Occupation_Explosives_Roles'),
 (0.02540763119792374, 'Operational_Process_Office_Located_on_Mine'),
 (0.03168133282995978, 'Sex_Female'),
 (0.03200701897512284, 'Sex_Male'),
 (0.032826542300152285, 'Operational_Process_Dredging_Activities'),
 (0.03344949251526011, 'Occupation_Driller_and_Support_Roles'),
 (0.034755618584935116, 'Occupation_Mine_Supervisory_and_Management_Roles'),
 (0.03555302381252499, 'Occupation_Technical_Services_Roles'),
 (0.0

## Model 3 - Source of Injury (SOI)

In [95]:
SOI_target = df["Source_of_Injury"]
data = df[["Sex","Age", "Occupation", "Operational_Process"]]
data.head()

Unnamed: 0,Sex,Age,Occupation,Operational_Process
0,Female,20_29,Labourer_Roles,Dredging_Activities
1,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
2,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
3,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
4,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...


In [96]:
data_dummies = pd.get_dummies(data)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data_dummies, SOI_target)

In [97]:
# Import Random Forrest, then provide a score for the model accuracy
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.13730355665839536

In [98]:
# This is how we predict and example based on default value
SOI_Prediction = rf.predict([[0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0]])

# Export the Output Headers to Excel
X_train.to_csv("Machine Learning Output - Source of Injury.csv", index=False)

# Export the model using pickle
with open('SOI_rf.pickle', 'wb') as file:
    pickle.dump(rf, file)

# Show Model Value importances
sorted(zip(rf.feature_importances_,data_dummies.columns))

[(0.0011893323253310687,
  'Operational_Process_Culm_Banks_Activities_Coal_Mining_Only'),
 (0.0012665346591735158, 'Operational_Process_Non_Mine_Workshops_and_Yards'),
 (0.004132131102649097, 'Occupation_Apprentice_or_Trainee_Roles'),
 (0.004674447335402862, 'Age_80_89'),
 (0.008772891564895809, 'Operational_Process_Other_Surface_Facility'),
 (0.010154866973879769, 'Age_70_79'),
 (0.011540378394515969, 'Occupation_Maritime_Roles'),
 (0.01577996446571124, 'Occupation_Explosives_Roles'),
 (0.01866845892270148, 'Operational_Process_Office_Located_on_Mine'),
 (0.0195171822180471, 'Occupation_Other_Role'),
 (0.021895159725816602, 'Age_Under_20'),
 (0.02297015182374637, 'Operational_Process_Dredging_Activities'),
 (0.024342967156125356, 'Occupation_Driller_and_Support_Roles'),
 (0.028166505120878628,
  'Operational_Process_Underground_Mine_Surface_Workshops_and_Yards'),
 (0.02997544328532091, 'Occupation_Technical_Services_Roles'),
 (0.030536436870898506, 'Sex_Male'),
 (0.03072590747549059, 

## Model 4 - Activity (ACT)

In [99]:
ACT_target = df["Activity"]
data = df[["Sex","Age", "Occupation", "Operational_Process"]]
data.head()

Unnamed: 0,Sex,Age,Occupation,Operational_Process
0,Female,20_29,Labourer_Roles,Dredging_Activities
1,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
2,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
3,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...
4,Female,30_39,Labourer_Roles,Crushing_or_Processing_Facility_Incl_Associate...


In [100]:
data_dummies = pd.get_dummies(data)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data_dummies, ACT_target)

In [101]:
# Import Random Forrest, then provide a score for the model accuracy
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.23986765922249792

In [102]:
rf

RandomForestClassifier(n_estimators=200)

In [103]:
# This is how we predict and example based on default value
ACT_Prediction = rf.predict([[0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0]])

# Export the Output Headers to Excel
X_train.to_csv("Machine Learning Output - Activity.csv", index=False)

# Export the model using pickle
with open('ACT_rf.pickle', 'wb') as file:
    pickle.dump(rf, file)

# Show Model Value importances
sorted(zip(rf.feature_importances_,data_dummies.columns))

[(0.002862609654419233,
  'Operational_Process_Culm_Banks_Activities_Coal_Mining_Only'),
 (0.003145350621373169, 'Operational_Process_Non_Mine_Workshops_and_Yards'),
 (0.003488615292030177, 'Age_80_89'),
 (0.003522560265075817, 'Occupation_Apprentice_or_Trainee_Roles'),
 (0.009673963658326599, 'Occupation_Maritime_Roles'),
 (0.010368568454162957, 'Age_70_79'),
 (0.010641916145612114, 'Operational_Process_Other_Surface_Facility'),
 (0.017297099571582937, 'Occupation_Other_Role'),
 (0.017709053308921117, 'Occupation_Explosives_Roles'),
 (0.020325691283899057, 'Operational_Process_Dredging_Activities'),
 (0.021645289619716755, 'Operational_Process_Office_Located_on_Mine'),
 (0.02219850406535844, 'Occupation_Driller_and_Support_Roles'),
 (0.02350380182955751, 'Occupation_Technical_Services_Roles'),
 (0.02540638535431165, 'Sex_Female'),
 (0.025890054164410096, 'Age_Under_20'),
 (0.026485632582850218, 'Sex_Male'),
 (0.026584257582630322, 'Occupation_Mine_Supervisory_and_Management_Roles'),
 

# Summary of Prediction

In [104]:
# Summarise the model results
print(f"Model Predictions:")
print(f"")
print(f"Nature of injury: {NOI_Prediction[0]}")
print(f"Injured Body Part: {IBP_Prediction[0]}")
print(f"Source of Injury: {SOI_Prediction[0]}")
print(f"Activity: {ACT_Prediction[0]}")

Model Predictions:

Nature of injury: Fracture, chip
Injured Body Part: Wrist,_Hand_and_Fingers
Source of Injury: Steel rail (all sizes)
Activity: Handling supplies or material, load and unload
