In [7]:
#|default_exp app

In [124]:
#|default
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn import preprocessing
import pickle

In [66]:
#|default
df = pd.read_csv('postnataldata.csv', date_parser='Timestamp')

In [67]:
df.head(2)

Unnamed: 0,Timestamp,Age,Feeling sad or Tearful,Irritable towards baby & partner,Trouble sleeping at night,Problems concentrating or making decision,Overeating or loss of appetite,Feeling anxious,Feeling of guilt,Problems of bonding with baby,Suicide attempt
0,6/14/2022 20:02,35-40,Yes,Yes,Two or more days a week,Yes,Yes,Yes,No,Yes,Yes
1,6/14/2022 20:03,40-45,Yes,No,No,Yes,Yes,No,Yes,Yes,No


In [68]:
## Preprocessing
for x in df.columns:
    print(f'{x} ---> {df[x].nunique()}')

Timestamp ---> 90
Age ---> 5
Feeling sad or Tearful ---> 3
Irritable towards baby & partner ---> 3
Trouble sleeping at night ---> 3
Problems concentrating or making decision ---> 3
Overeating or loss of appetite ---> 3
Feeling anxious ---> 2
Feeling of guilt ---> 3
Problems of bonding with baby ---> 3
Suicide attempt ---> 3


In [69]:
df.dtypes

Timestamp                                    object
Age                                          object
Feeling sad or Tearful                       object
Irritable towards baby & partner             object
Trouble sleeping at night                    object
Problems concentrating or making decision    object
Overeating or loss of appetite               object
Feeling anxious                              object
Feeling of guilt                             object
Problems of bonding with baby                object
Suicide attempt                              object
dtype: object

In [70]:
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

In [71]:
categorical_columns = ['Feeling sad or Tearful', 'Irritable towards baby & partner', 'Trouble sleeping at night','Problems concentrating or making decision',
'Overeating or loss of appetite', 'Feeling anxious', 'Feeling of guilt', 'Problems of bonding with baby', 'Suicide attempt']

In [72]:
## Preprocessing
for x in categorical_columns:
    print(f'{x} ---> {df[x].unique()}')

Feeling sad or Tearful ---> ['Yes' 'No' 'Sometimes']
Irritable towards baby & partner ---> ['Yes' 'No' 'Sometimes' nan]
Trouble sleeping at night ---> ['Two or more days a week' 'No' 'Yes']
Problems concentrating or making decision ---> ['Yes' 'No' 'Often' nan]
Overeating or loss of appetite ---> ['Yes' 'No' 'Not at all']
Feeling anxious ---> ['Yes' 'No']
Feeling of guilt ---> ['No' 'Yes' 'Maybe' nan]
Problems of bonding with baby ---> ['Yes' 'Sometimes' 'No']
Suicide attempt ---> ['Yes' 'No' 'Not interested to say']


In [73]:
first_mapper = {"No": 0, "Yes" : 1, "Maybe": 3, 'Sometimes': 4}

In [112]:
def preprocessing(df):
    df_copy = df.copy()
    # Numericalize all categorical columns
    for x in df_copy[categorical_columns]:
        df_copy[x] = df_copy[x].str.capitalize().str.strip(' ')
        df_copy[x] = df_copy[x].map(mapper)
        
    # Age column feature engineering
    df_copy['Max Age'] = df_copy['Age'].str[:2].astype('int')  
    df_copy['Min Age'] = df_copy['Age'].str[3:].astype('int') 
    df_copy['Mean Age'] = (df_copy['Max Age'] +  df_copy['Min Age']) / 2
    
    # Extract date features from Timestamp
    df_copy['year'] = df_copy['Timestamp'].dt.year
    df_copy['month'] = df_copy['Timestamp'].dt.month
    df_copy['day'] = df_copy['Timestamp'].dt.day
    df_copy['hour'] = df_copy['Timestamp'].dt.hour
    df_copy['minute'] = df_copy['Timestamp'].dt.minute
    df_copy['second'] = df_copy['Timestamp'].dt.second
    
    df_copy.fillna(0, inplace=True)
    
    # Remove Unnecessary features
    df_copy = df_copy.drop(['Timestamp', 'Age'], axis=1)
    
    return df_copy

In [113]:
new_df = preprocessing(df)

In [114]:
new_df.head()

Unnamed: 0,Feeling sad or Tearful,Irritable towards baby & partner,Trouble sleeping at night,Problems concentrating or making decision,Overeating or loss of appetite,Feeling anxious,Feeling of guilt,Problems of bonding with baby,Suicide attempt,Max Age,Min Age,Mean Age,year,month,day,hour,minute,second
0,1.0,1.0,0.0,1.0,1.0,1,0.0,1.0,1.0,35,40,37.5,2022,6,14,20,2,0
1,1.0,0.0,0.0,1.0,1.0,0,1.0,1.0,0.0,40,45,42.5,2022,6,14,20,3,0
2,1.0,0.0,1.0,1.0,1.0,1,0.0,0.0,0.0,35,40,37.5,2022,6,14,20,4,0
3,1.0,1.0,1.0,1.0,0.0,1,3.0,0.0,0.0,35,40,37.5,2022,6,14,20,5,0
4,1.0,0.0,0.0,1.0,0.0,1,0.0,1.0,0.0,40,45,42.5,2022,6,14,20,6,0


In [115]:
X = new_df.drop('Feeling anxious', axis=1)
y = new_df.loc[:, 'Feeling anxious']

In [116]:
X.shape, y.shape

((1503, 17), (1503,))

In [121]:
rf = RandomForestClassifier()

In [122]:
rf.fit(X, y)

In [125]:
pickle.dump(rf, open('rf.pkl', 'wb'))

In [None]:
!ls

Dockerfile        PostPartum.ipynb  [1m[36mjob-10faecc1[m[m      requirements.txt
[1m[36mPostPartum[m[m        PostPartum.py     postnataldata.csv [1m[36mresults[m[m


In [131]:
with open('job-10faecc1/raw/QmPiMDqi2ZDX7JhoTWHY3rk61eKqZhzNdhQEYS52zF8BGJ/outputs/rf.pkl', 'rb') as f:
    modelll = pickle.load(f)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [None]:
modelll.predict(X)

array([1, 0, 1, ..., 0, 1, 1])

In [132]:
modelll