# Document

In [4]:
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.datasets import fetch_20newsgroups

categories = [
    'alt.atheism', 'soc.religion.christian',
    'comp.graphics', 'sci.med'
]

remove = ('headers', 'footers', 'quotes')
twenty_train = fetch_20newsgroups(
                        subset='train',
                        remove=remove,
                        categories=categories)
twenty_test = fetch_20newsgroups(
                        subset='test',
                        remove=remove,
                        categories=categories)

In [2]:
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(twenty_train.data)
X_test_count = count_vect.transform(twenty_test.data)

model = LinearSVC(max_iter=20000)
model.fit(X_train_counts, twenty_train.target)
predicted = model.predict(X_test_count)

print(np.mean(predicted == twenty_test.target))

0.7423435419440746


In [5]:
tf_vec = TfidfVectorizer()
X_train_tfidf = tf_vec.fit_transform(twenty_train.data)
X_test_tfidf = tf_vec.transform(twenty_test.data)

model = LinearSVC(max_iter=20000)
model.fit(X_train_tfidf, twenty_train.target)

predicted = model.predict(X_test_tfidf)
print(np.mean(predicted==twenty_test.target))

0.8149134487350199


# Image

In [13]:
from PIL import Image
import numpy as np

img = Image.open('zero_image.png').convert('L')

width, height = img.size
img_pixels = []

for y in range(height):
    for x in range(width):
        img_pixels.append(img.getpixel((x,y)))
        
print(img_pixels)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [14]:
from sklearn import datasets
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
digits = datasets.load_digits()
n_samples = len(digits.images)
data = digits.images.reshape((n_samples,-1))

model = RandomForestClassifier(n_estimators=10)
model.fit(data[:n_samples//2], digits.target[:n_samples//2])

expected = digits.target[n_samples//2:]
predicted = model.predict(data[n_samples//2:])

print(metrics.classification_report(expected, predicted))

              precision    recall  f1-score   support

           0       0.94      0.99      0.96        88
           1       0.85      0.86      0.85        91
           2       0.90      0.91      0.90        86
           3       0.86      0.89      0.88        91
           4       0.90      0.86      0.88        92
           5       0.88      0.84      0.86        91
           6       0.92      0.97      0.94        91
           7       0.93      0.90      0.91        89
           8       0.87      0.74      0.80        88
           9       0.79      0.88      0.84        92

    accuracy                           0.88       899
   macro avg       0.88      0.88      0.88       899
weighted avg       0.88      0.88      0.88       899

