# Cryptocurrency classifier

## import library

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
import joblib
# models import
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import GradientBoostingClassifier

## import csv

In [2]:
btc_data = pd.read_csv('dataset/btc_address.csv')
eth_data = pd.read_csv('dataset/eth_address.csv')
trx_data = pd.read_csv('dataset/trx_address.csv')

## print and check to asure data

In [3]:
print(btc_data.head())
print(eth_data.head())
print(trx_data.head())

                              address type
0  1FeexV6bAHb8ybZjqQMjJrcCrHGW9sb6uF  BTC
1  12tkqA9xSoowkzoERHMWNKsTey55YEBqkv  BTC
2  12ib7dApVFvg82TXKycWBNpN8kFyiAN1dr  BTC
3  1PeizMg76Cf96nUQrYg8xuoZWLQozU5zGW  BTC
4  198aMn6ZYAczwrE5NvNTUMyJ5qkfy4g3Hi  BTC
                                      address type
0  0x8ab7404063ec4dbcfd4598215992dc3f8ec853d7  ETH
1  0x1c74cff0376fb4031cd7492cd6db2d66c3f2c6b9  ETH
2  0x06af07097c9eeb7fd685c692751d5c66db49c215  ETH
3  0xc00e94cb662c3520282e6f5717214004a7f26888  ETH
4  0xb3319f5d18bc0d84dd1b4825dcde5d5f7266d407  ETH
                              address type
0  TNUC9Qb1rRpS5CbWLmNMxXBjyFoydXjWFR  TRX
1  TNMcQVGPzqH9ZfMCSY4PNrukevtDgp24dK  TRX
2  TPyjyZfsYaXStgz2NmAraF1uZcMtkgNan5  TRX
3  TNPdqto8HiuMzoG7Vv9wyyYhWzCojLeHAF  TRX
4  TWnHGFEjgrJpHXPiPnddPtdCwKGMwpxK8T  TRX


## concatenate the data and shuffled

In [4]:
data = pd.concat([btc_data, eth_data, trx_data], ignore_index=True)
data = data.sample(frac=1).reset_index(drop=True)

## split the data into (label and features) and vectorize the address

In [5]:
X = data['address']
y = data['type']

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(X)

## split data into train and test sets

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## save the vectorizer

In [7]:
joblib.dump(vectorizer, 'model/vectorizer.pkl')
print("Vectorizer has saved successfully")

Vectorizer has saved successfully


## train the model

### k-nearest neighbors

In [8]:
knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X_train, y_train)

In [9]:
joblib.dump(knn_classifier, 'model/knn_classifier.pkl')
print("Model has saved successfully")

Model has saved successfully


### naive bayes classifier

In [10]:
mnb_classifier = MultinomialNB()
mnb_classifier.fit(X_train, y_train)

In [11]:
joblib.dump(mnb_classifier, 'model/naive_bayes_classifier.pkl')
print("Model has saved successfully")

Model has saved successfully


### support vector classifier

In [12]:
svc_classifier = SVC(kernel='linear')
svc_classifier.fit(X_train, y_train)

In [13]:
joblib.dump(svc_classifier, 'model/svc_classifier.pkl')
print("Model has saved successfully")

Model has saved successfully


### gradient boosting classifier

In [14]:
gbc_classifier = GradientBoostingClassifier()
gbc_classifier.fit(X_train, y_train)

In [15]:
joblib.dump(gbc_classifier, 'model/gbc_classifier.pkl')
print("Model has saved successfully")

Model has saved successfully
