# MLP

In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [3]:
df_real = pd.read_csv('../data/balanced_real_data.csv')
df_fake = pd.read_csv('../data/balanced_fake_data.csv')

In [4]:
df_real.head()

Unnamed: 0,username,profile_pic,biography,follows_count,followed_by_count,ff_ratio,media_count,is_private,is_verified,is_business_account,...,std_comments,var_comments,skw_comments,mean_time_between_posts,max_time_between_posts,min_time_between_posts,std_time_between_posts,var_time_between_posts,skw_time_between_posts,real_account
0,irinashayk,True,11,597,13156189,22037.167504,1592,False,True,False,...,878.448591,771671.926667,2.252054,207708.3,611811.0,31545.0,171989.4,29580360000.0,1.075537,True
1,dementieva_a______,True,0,423,192,0.453901,50,True,False,False,...,,,,,,,,,,True
2,lorenz.fini,True,0,426,151,0.35446,37,False,False,False,...,2.827248,7.993333,4.049417,2232875.0,7637239.0,92.0,2328249.0,5420744000000.0,0.828757,True
3,jessicagiulia,True,128,450,694,1.542222,1144,False,False,False,...,1.683251,2.833333,0.963009,126078.5,343781.0,83938.0,80450.28,6472248000.0,1.596275,True
4,il_socio_aci,True,60,1,327728,327728.0,392,False,False,True,...,123.279128,15197.743333,2.519276,328715.3,2955259.0,64159.0,582898.5,339770700000.0,4.056672,True


In [5]:
df_fake.head()

Unnamed: 0,username,profile_pic,biography,follows_count,followed_by_count,ff_ratio,media_count,is_private,is_verified,is_business_account,...,std_comments,var_comments,skw_comments,mean_time_between_posts,max_time_between_posts,min_time_between_posts,std_time_between_posts,var_time_between_posts,skw_time_between_posts,real_account
0,_s.a.v.a__v.l.a.d_,True,56,2321,747,0.321844,10,True,False,False,...,,,,,,,,,,False
1,pp_sport_julduz,True,147,4247,611,0.143866,99,False,False,True,...,0.678233,0.46,2.068805,80254.208333,1466742.0,10.0,297334.2,88407620000.0,4.488934,False
2,clark5.7lyfe,True,127,1764,1613,0.914399,64,False,False,False,...,10.224643,104.543333,3.196293,333885.0,836121.0,27637.0,224060.2,50202990000.0,0.519589,False
3,nata53149,True,0,68,65,0.955882,1,False,False,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False
4,olgashatko.63,True,80,242,134,0.553719,80,False,False,False,...,0.6245,0.39,4.086591,694503.0,6477346.0,8.0,1489132.0,2217513000000.0,2.800966,False


In [6]:
df=pd.concat([df_real,df_fake],ignore_index=True)

In [7]:
print(df)

                  username  profile_pic  biography  follows_count  \
0               irinashayk         True         11            597   
1       dementieva_a______         True          0            423   
2              lorenz.fini         True          0            426   
3            jessicagiulia         True        128            450   
4             il_socio_aci         True         60              1   
...                    ...          ...        ...            ...   
15303           tanku26_02         True         31           1440   
15304      leyes.naturales         True          3           3040   
15305           dyakonov05         True          0           6638   
15306  jason_steven_kettle         True         66           4501   
15307           ramon.educ         True         45            695   

       followed_by_count       ff_ratio  media_count  is_private  is_verified  \
0               13156189   22037.167504         1592       False         True   
1        

In [8]:
# df.drop('Unnamed: 0', axis=1, inplace=True)

In [9]:
features_columns=list(df.columns[:30])
# targets_column=list(df.columns[30:31])
# df.head()
print(features_columns)

['username', 'profile_pic', 'biography', 'follows_count', 'followed_by_count', 'ff_ratio', 'media_count', 'is_private', 'is_verified', 'is_business_account', 'is_joined_recently', 'highlight_reel_count', 'average_likes', 'max_likes', 'min_likes', 'std_likes', 'var_likes', 'skw_likes', 'average_comments', 'max_comments', 'min_comments', 'std_comments', 'var_comments', 'skw_comments', 'mean_time_between_posts', 'max_time_between_posts', 'min_time_between_posts', 'std_time_between_posts', 'var_time_between_posts', 'skw_time_between_posts']


In [10]:
df.shape

(15308, 31)

In [11]:
# cl_f = {'pos': 1, 'neg': 0}
# df['Class'] = df['Class'].map(cl_f)

In [12]:
targets = df['real_account'].values
df=df.loc[:,'profile_pic':'highlight_reel_count']

In [13]:
targets

array([ True,  True,  True, ..., False, False, False])

In [14]:
# df.drop('real_account', axis=1, inplace=True)

In [15]:
df.head()

Unnamed: 0,profile_pic,biography,follows_count,followed_by_count,ff_ratio,media_count,is_private,is_verified,is_business_account,is_joined_recently,highlight_reel_count
0,True,11,597,13156189,22037.167504,1592,False,True,False,False,15
1,True,0,423,192,0.453901,50,True,False,False,False,0
2,True,0,426,151,0.35446,37,False,False,False,False,2
3,True,128,450,694,1.542222,1144,False,False,False,False,10
4,True,60,1,327728,327728.0,392,False,False,True,False,1


In [16]:
df_sc = StandardScaler().fit_transform(df)

In [17]:
x_train, x_test, y_train, y_test = train_test_split(df_sc, targets, test_size = 0.2, random_state = 12345)

In [18]:
mlp = MLPClassifier(hidden_layer_sizes=([15,10]),max_iter=500)

In [19]:
mlp.fit(x_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=[15, 10], learning_rate='constant',
              learning_rate_init=0.001, max_iter=500, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [20]:
mlp_pred = mlp.predict(x_test)

In [21]:
from sklearn.metrics import confusion_matrix, classification_report

In [22]:
print(confusion_matrix(y_test, mlp_pred))

[[1393  168]
 [ 167 1334]]


In [23]:
print(classification_report(y_test, mlp_pred))

              precision    recall  f1-score   support

       False       0.89      0.89      0.89      1561
        True       0.89      0.89      0.89      1501

    accuracy                           0.89      3062
   macro avg       0.89      0.89      0.89      3062
weighted avg       0.89      0.89      0.89      3062

