<a href="https://colab.research.google.com/github/dkurbatovv/Python/blob/main/Abalone_Age_predict.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import r2_score, confusion_matrix, accuracy_score, log_loss, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression

In [None]:
columns_name = ['Sex', 'Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Viscera weigh', 'Shell weight', 'Rings']

In [None]:
df = pd.read_csv('abalone.data', names = columns_name, header = None)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.dropna()

In [None]:
plt.figure(figsize=(12,10))
sns.countplot(data = df, x = 'Sex', palette = 'Set1')

In [None]:
df.hist(bins = 50, figsize = (12,10))
plt.show()

In [None]:
df['age'] = df['Rings'] + 1.5

In [None]:
plt.figure(figsize=(12,8))
sn = sns.countplot(x='age',data=df, hue='Sex', palette=['pink','crimson',"Yellow"])

In [None]:
le = LabelEncoder()
df['Sex'] = le.fit_transform(df['Sex'])

In [None]:
df.head()

In [None]:
new_df = df.drop('Rings', axis = 1)

In [None]:
new_df.head()



In [None]:
model = LinearRegression()

In [None]:
X = new_df.drop('age', axis = 1)
y = new_df['age']

In [None]:
X_train, X_test, y_train, y_test  = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [None]:
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_train)

In [None]:
print('The Accuracy  on the training dataset is: ', model.score(X_train, y_train) )
print('The Accuracy r2  on the training dataset is: ',r2_score(y_train,y_pred) )   

In [None]:
print('The Accuracy  on the training dataset is: ', model.score(X_test, y_test) )
print('The Accuracy r2  on the training dataset is: ',r2_score(y_test,model.predict(X_test)) )   

In [None]:
df.describe()

In [None]:
Age = []
for i in df['age']:
  if i > 11.4:
    Age.append('1')
  else:
    Age.append('0')

new_df['Age'] = Age
new_df.drop('age', axis = 1)

In [None]:
X_new = new_df.drop('Age', axis = 1)
y_new = new_df['Age']

X_train_new, X_test_new, y_train_new, y_test_new  = train_test_split(X_new, y_new, test_size = 0.2, random_state = 42)

In [None]:
lr = LogisticRegression()

In [None]:
lr.fit(X_train_new, y_train_new)

In [None]:
lr_pred = lr.predict(X_train_new)

In [None]:
lr_pred_proba = lr.predict_proba(X_train_new)

In [None]:
lr.coef_

In [None]:
# Accuracy on Train
print("The Training Accuracy is: ", lr.score(X_train_new, y_train_new))

# Accuracy on Test
print("The Testing Accuracy is: ", lr.score(X_test_new, y_test_new))


# Classification Report
print(classification_report(y_train_new, lr_pred))

In [None]:
# Confusion Matrix function

def plot_confusion_matrix(cm, classes=None, title='Confusion matrix'):
    """Plots a confusion matrix."""
    if classes is not None:
        sns.heatmap(cm, cmap="YlGnBu", xticklabels=classes, yticklabels=classes, vmin=0., vmax=1., annot=True, annot_kws={'size':50})
    else:
        sns.heatmap(cm, vmin=0., vmax=1.)
    plt.title(title)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
cm = confusion_matrix(y_train_new, lr_pred)
cm_norm = cm / cm.sum(axis=1).reshape(-1,1)

plot_confusion_matrix(cm_norm, classes = lr.classes_, title='Confusion matrix')

In [None]:
new_df['Age'].value_counts()

In [None]:
print('Confusion Matrix: \n', confusion_matrix(y_test_new, lr.predict(X_test_new)))