# Cryptocurrency classifier

## import data

In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import joblib

## import csv

In [None]:
btc_data = pd.read_csv('dataset/btc_address.csv')
eth_data = pd.read_csv('dataset/eth_address.csv')
trx_data = pd.read_csv('dataset/trx_address.csv')

## print and check to asure data

In [None]:
print(btc_data.head())
print(eth_data.head())
print(trx_data.head())

## concatenate the data and shuffled

In [None]:
data = pd.concat([btc_data, eth_data, trx_data], ignore_index=True)
data = data.sample(frac=1).reset_index(drop=True)

## split the data into (label and features) and vectorize the address

In [None]:
X = data['address']
y = data['type']

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(X)

## split data into train and test sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## train the model

In [None]:
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train, y_train)

## Save the model and vectorizer

In [None]:
joblib.dump(svm_classifier, 'model/svm_classifier.pkl')
joblib.dump(vectorizer, 'model/vectorizer.pkl')
print("Model and vectorizer has saved successfully")