In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.neural_network import MLPClassifier

In [2]:
dataset = pd.read_csv('imdb.csv')
dataset.head()
dataset = dataset[:1000]

# Create Vectors

In [3]:
count = CountVectorizer(stop_words='english')
X = count.fit_transform(dataset['review'])

In [4]:
count_60 = CountVectorizer(stop_words='english', max_df = 0.6)
X_60 = count.fit_transform(dataset['review'])
count_70 = CountVectorizer(stop_words='english', max_df = 0.7)
X_70 = count.fit_transform(dataset['review'])
count_80 = CountVectorizer(stop_words='english', max_df = 0.8)
X_80 = count.fit_transform(dataset['review'])

count_10 = CountVectorizer(stop_words='english', min_df = 0.1)
X_10 = count.fit_transform(dataset['review'])
count_20 = CountVectorizer(stop_words='english', min_df = 0.2)
X_20 = count.fit_transform(dataset['review'])
count_30 = CountVectorizer(stop_words='english', min_df = 0.3)
X_30 = count.fit_transform(dataset['review'])

print(X_60.shape)
print(X_70.shape)
print(X_80.shape)

print(X_10.shape)
print(X_20.shape)
print(X_30.shape)

(1000, 17625)
(1000, 17625)
(1000, 17625)
(1000, 17625)
(1000, 17625)
(1000, 17625)


# Transform

In [5]:
tfidf = TfidfTransformer()

In [6]:
X_60_ = tfidf.fit_transform(X_60)
X_70_ = tfidf.fit_transform(X_70)
X_80_ = tfidf.fit_transform(X_80)

X_10_ = tfidf.fit_transform(X_10)
X_20_ = tfidf.fit_transform(X_20)
X_30_ = tfidf.fit_transform(X_30)


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, dataset['sentiment'], test_size=0.33)

X_train_60, X_test_60, y_train_60, y_test_60 = train_test_split(X_60_, dataset['sentiment'], test_size=0.33)
X_train_70, X_test_70, y_train_70, y_test_70 = train_test_split(X_70_, dataset['sentiment'], test_size=0.33)
X_train_80, X_test_80, y_train_80, y_test_80 = train_test_split(X_80_, dataset['sentiment'], test_size=0.33)

X_train_10, X_test_10, y_train_10, y_test_10 = train_test_split(X_10_, dataset['sentiment'], test_size=0.33)
X_train_20, X_test_20, y_train_20, y_test_20 = train_test_split(X_20_, dataset['sentiment'], test_size=0.33)
X_train_30, X_test_30, y_train_30, y_test_30 = train_test_split(X_30_, dataset['sentiment'], test_size=0.33)


# Fiting

In [8]:
model = MLPClassifier(max_iter=300)
model.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_iter=300, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [9]:
#fit max 60

model.fit(X_train_60, y_train_60)
print(model.loss_)

0.00456987714525693


In [10]:
#fit max 70

model.fit(X_train_70, y_train_70)
print(model.loss_)

0.00454576575820199


In [11]:
#fit max 80

model.fit(X_train_80, y_train_80)
print(model.loss_)

0.004690073450627163


In [12]:
# fit min 10

model.fit(X_train_10, y_train_10)
print(model.loss_)

0.004517502892796195


In [13]:
# fit min 20

model.fit(X_train_20, y_train_20)
print(model.loss_)

0.004665096428805899


In [14]:
# fit min 30

model.fit(X_train_30, y_train_30)
print(model.loss_)

0.004636190520788493


# Score

In [15]:
model.score(X_test, y_test)

0.9272727272727272

In [16]:
#Score max 60

model.score(X_test_60, y_test_60)

0.9242424242424242

In [17]:
#Score max 70

model.score(X_test_70, y_test_70)

0.9090909090909091

In [18]:
#Score max 80

model.score(X_test_80, y_test_80)

0.9121212121212121

In [19]:
#Score min 10

model.score(X_test_10, y_test_10)

0.896969696969697

In [20]:
#Score min 20

model.score(X_test_20, y_test_20)

0.9242424242424242

In [21]:
#Score min 30

model.score(X_test_30, y_test_30)

0.7727272727272727

In [22]:
model.loss_

0.004636190520788493

# Predict

In [23]:
predict = model.predict(X_test)
predict

array(['negative', 'negative', 'negative', 'negative', 'negative',
       'negative', 'negative', 'negative', 'positive', 'negative',
       'negative', 'negative', 'negative', 'negative', 'positive',
       'negative', 'negative', 'negative', 'positive', 'negative',
       'positive', 'negative', 'positive', 'positive', 'negative',
       'positive', 'negative', 'negative', 'negative', 'negative',
       'positive', 'negative', 'negative', 'negative', 'positive',
       'negative', 'negative', 'negative', 'negative', 'negative',
       'negative', 'positive', 'positive', 'negative', 'positive',
       'negative', 'negative', 'positive', 'positive', 'positive',
       'negative', 'positive', 'positive', 'positive', 'negative',
       'negative', 'positive', 'negative', 'negative', 'positive',
       'negative', 'negative', 'positive', 'positive', 'positive',
       'positive', 'positive', 'negative', 'negative', 'negative',
       'negative', 'positive', 'positive', 'negative', 'positi

In [24]:
# predict max 60

predict_60 = model.predict(X_test_60)
predict_60

array(['positive', 'positive', 'negative', 'negative', 'negative',
       'negative', 'positive', 'negative', 'positive', 'negative',
       'positive', 'positive', 'negative', 'positive', 'positive',
       'positive', 'positive', 'negative', 'positive', 'positive',
       'positive', 'positive', 'positive', 'positive', 'negative',
       'positive', 'positive', 'positive', 'positive', 'positive',
       'positive', 'negative', 'positive', 'positive', 'negative',
       'positive', 'negative', 'positive', 'negative', 'positive',
       'negative', 'positive', 'negative', 'negative', 'negative',
       'negative', 'negative', 'positive', 'negative', 'positive',
       'negative', 'negative', 'negative', 'positive', 'positive',
       'positive', 'negative', 'positive', 'negative', 'negative',
       'negative', 'negative', 'negative', 'negative', 'negative',
       'positive', 'negative', 'negative', 'negative', 'negative',
       'positive', 'negative', 'negative', 'negative', 'negati

In [25]:
# predict max 70

predict_70 = model.predict(X_test_70)
predict_70

array(['negative', 'negative', 'negative', 'negative', 'positive',
       'positive', 'negative', 'negative', 'negative', 'negative',
       'positive', 'positive', 'negative', 'negative', 'negative',
       'negative', 'negative', 'positive', 'positive', 'negative',
       'positive', 'negative', 'positive', 'negative', 'negative',
       'positive', 'negative', 'positive', 'positive', 'positive',
       'positive', 'negative', 'positive', 'negative', 'negative',
       'negative', 'negative', 'positive', 'negative', 'positive',
       'positive', 'positive', 'positive', 'negative', 'positive',
       'negative', 'positive', 'negative', 'positive', 'negative',
       'positive', 'negative', 'negative', 'negative', 'negative',
       'positive', 'negative', 'positive', 'negative', 'positive',
       'positive', 'positive', 'positive', 'positive', 'negative',
       'negative', 'positive', 'negative', 'negative', 'negative',
       'negative', 'negative', 'positive', 'negative', 'negati

In [26]:
# predict max 80

predict_80 = model.predict(X_test_80)
predict_80

array(['negative', 'positive', 'negative', 'negative', 'negative',
       'negative', 'positive', 'positive', 'negative', 'positive',
       'negative', 'positive', 'negative', 'negative', 'positive',
       'negative', 'positive', 'positive', 'positive', 'positive',
       'negative', 'positive', 'positive', 'positive', 'positive',
       'positive', 'negative', 'positive', 'negative', 'positive',
       'positive', 'positive', 'negative', 'positive', 'negative',
       'positive', 'positive', 'negative', 'positive', 'positive',
       'positive', 'positive', 'positive', 'positive', 'negative',
       'positive', 'positive', 'positive', 'positive', 'negative',
       'negative', 'negative', 'negative', 'negative', 'negative',
       'negative', 'positive', 'negative', 'positive', 'negative',
       'positive', 'negative', 'positive', 'positive', 'negative',
       'positive', 'negative', 'negative', 'positive', 'negative',
       'negative', 'negative', 'negative', 'negative', 'negati

In [27]:
# predict min 10

predict_10 = model.predict(X_test_10)
predict_10

array(['positive', 'negative', 'negative', 'positive', 'negative',
       'negative', 'positive', 'positive', 'positive', 'negative',
       'negative', 'negative', 'positive', 'negative', 'positive',
       'positive', 'negative', 'negative', 'positive', 'positive',
       'positive', 'positive', 'negative', 'negative', 'positive',
       'positive', 'positive', 'positive', 'positive', 'positive',
       'positive', 'negative', 'positive', 'positive', 'negative',
       'positive', 'negative', 'positive', 'positive', 'negative',
       'negative', 'negative', 'negative', 'positive', 'positive',
       'positive', 'negative', 'negative', 'negative', 'negative',
       'positive', 'negative', 'positive', 'positive', 'positive',
       'positive', 'positive', 'positive', 'positive', 'positive',
       'negative', 'negative', 'positive', 'positive', 'positive',
       'negative', 'positive', 'positive', 'positive', 'positive',
       'negative', 'negative', 'negative', 'positive', 'positi

In [28]:
# predict min 20

predict_20 = model.predict(X_test_20)
predict_20

array(['positive', 'positive', 'positive', 'positive', 'positive',
       'negative', 'positive', 'positive', 'negative', 'negative',
       'negative', 'positive', 'negative', 'positive', 'positive',
       'negative', 'negative', 'positive', 'negative', 'positive',
       'positive', 'negative', 'positive', 'positive', 'positive',
       'positive', 'negative', 'negative', 'positive', 'negative',
       'negative', 'positive', 'negative', 'negative', 'positive',
       'positive', 'negative', 'positive', 'positive', 'positive',
       'positive', 'positive', 'positive', 'negative', 'positive',
       'positive', 'positive', 'positive', 'positive', 'negative',
       'positive', 'positive', 'negative', 'negative', 'negative',
       'positive', 'positive', 'negative', 'positive', 'positive',
       'positive', 'positive', 'positive', 'positive', 'negative',
       'positive', 'negative', 'positive', 'negative', 'negative',
       'positive', 'positive', 'positive', 'negative', 'negati

In [29]:
# predict min 30

predict_30 = model.predict(X_test_30)
predict_30

array(['negative', 'positive', 'positive', 'positive', 'positive',
       'negative', 'negative', 'negative', 'negative', 'negative',
       'negative', 'positive', 'positive', 'negative', 'negative',
       'positive', 'positive', 'negative', 'negative', 'negative',
       'positive', 'negative', 'negative', 'positive', 'positive',
       'positive', 'positive', 'negative', 'negative', 'positive',
       'positive', 'negative', 'negative', 'positive', 'positive',
       'positive', 'positive', 'positive', 'negative', 'negative',
       'positive', 'negative', 'positive', 'positive', 'negative',
       'positive', 'negative', 'negative', 'positive', 'positive',
       'negative', 'negative', 'positive', 'negative', 'negative',
       'positive', 'negative', 'positive', 'positive', 'positive',
       'positive', 'positive', 'positive', 'negative', 'positive',
       'positive', 'positive', 'positive', 'positive', 'positive',
       'positive', 'negative', 'negative', 'negative', 'positi

# f1 score

In [30]:
from sklearn.metrics import f1_score
f1_score(y_test, list(predict), pos_label="positive")

0.9272727272727272

In [31]:
# f1 score max 60
f1_score(y_test_60, list(predict_60), pos_label="positive")

0.923076923076923

In [32]:
# f1 score max 70
f1_score(y_test_70, list(predict_70), pos_label="positive")

0.9142857142857143

In [33]:
# f1 score max 80
f1_score(y_test_80, list(predict_80), pos_label="positive")

0.911854103343465

In [34]:
# f1 score min 10
f1_score(y_test_10, list(predict_10), pos_label="positive")

0.9017341040462428

In [35]:
# f1 score min 20
f1_score(y_test_20, list(predict_20), pos_label="positive")

0.9295774647887324

In [36]:
# f1 score min 30
f1_score(y_test_30, list(predict_30), pos_label="positive")

0.7838616714697406