In [None]:
#Import supporting libraries

import pandas as pd
import numpy as np

In [None]:
#Load th dataset

df = pd.read_csv('../input/smoke-detection-dataset/smoke_detection_iot.csv')
df.head()

In [None]:
#checking the file size

df.shape

In [None]:
#checking file info

df.info()

In [None]:
# checking null values

df.isnull().sum()

In [None]:
#dropping Unnamed: 0 column as it is not relevent for the prediction

df.drop(columns = ['Unnamed: 0','UTC'],axis = 1,inplace = True)

In [None]:
df.head(3)

In [None]:
#checking the parameters given

df.columns

In [None]:
#checking descriptive statistics for the dataset

df.describe()

In [None]:
#checking the value counts for target column

df['Fire Alarm'].value_counts()
# 1- Fire is there
# 0- No fire

## visualization 

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rc_params

In [None]:

plt.pie(df['Fire Alarm'].value_counts(),[0.2,0],labels=['Fire','No Fire'],autopct='%1.1f%%',colors=['green','red'])
plt.title('Fire Alarm')
plt.show()

In [None]:
sns.distplot(df['Temperature[C]'])

In [None]:
sns.distplot(df['Humidity[%]'])

In [None]:
sns.distplot(df['TVOC[ppb]'])

In [None]:
sns.distplot(df['eCO2[ppm]'])

In [None]:
df.corr()

In [None]:
plt.figure(figsize=(20,15))

sns.heatmap(df.corr(),annot=True)

In [None]:
# As multicollinearity found in NC1.0 and NC2.5, we can drop any one of these columns,also in between PM1.0 and PM2.5

In [None]:
df.corr()['Fire Alarm'].sort_values(ascending=False)

In [None]:
df.drop(columns = ['NC1.0','PM1.0'],axis = 1,inplace =True)

In [None]:
df.head()

In [None]:
# splitting into indepenent and dependent variable

X=df.drop(columns = ['Fire Alarm'])
X.head()

In [None]:
y=df['Fire Alarm']
y.head()

In [None]:
from sklearn.preprocessing import MinMaxScaler
scale = MinMaxScaler()

In [None]:
X_scaled=pd.DataFrame(scale.fit_transform(X),columns=X.columns)
X_scaled.head()

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X_scaled,y,test_size=0.3,random_state = 0)

In [None]:
from imblearn.over_sampling import SMOTE
smote = SMOTE()

In [None]:
y_train.value_counts()

In [None]:
x_train_smote, y_train_smote = smote.fit_resample(x_train, y_train)

In [None]:
y_train_smote.value_counts()

In [None]:
from sklearn.ensemble import RandomForestClassifier
model=RandomForestClassifier()
model.fit(x_train_smote, y_train_smote)
y_pred_test=model.predict(x_test)
y_pred_train=model.predict(x_train_smote)


In [None]:
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [None]:
test_acc = accuracy_score(y_test,y_pred_test)
train_acc = accuracy_score(y_train_smote,y_pred_train)


In [None]:
print('test_acc: ', test_acc)
print('train_acc: ', train_acc)


In [None]:
pd.crosstab(y_test,y_pred_test)

In [None]:
print(classification_report(y_test, y_pred_test))

In [None]:
#testing with random value

model.predict([[20,57.36,0,400,12306,18520,939.735,0,0,0,0]])

In [None]:
model.predict([[20.145,60.1,20,430,12471,19500,939.755,0.56,0.12,0.142,20]])

In [None]:
# saving the model

import joblib
joblib.dump(model,'smoke.pkl')  