<a href="https://colab.research.google.com/github/giocarro/Data_Science_Gio/blob/main/Tareas/Iris_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Libraries

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

In [None]:
%cd /content/

/content


In [None]:
df = pd.read_csv('iris.csv')
df

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Sepal.Length  150 non-null    float64
 1   Sepal.Width   150 non-null    float64
 2   Petal.Length  150 non-null    float64
 3   Petal.Width   150 non-null    float64
 4   Species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [None]:
class_counts = df['Species'].value_counts()
class_counts

setosa        50
versicolor    50
virginica     50
Name: Species, dtype: int64

In [None]:
fig = px.bar(x=class_counts.index, y=class_counts.values, labels={'x': 'Class', 'y': 'Count'}, title='Species Class Distribution', template = 'plotly_white', text = class_counts.values)
fig.show()

In [None]:
class_1 = df[df['Species'] == 'setosa']
class_2 = df[df['Species'] == 'versicolor']
class_3 = df[df['Species'] == 'virginica']


In [None]:
features = df.columns[:-1]
features

Index(['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width'], dtype='object')

In [None]:
print("\nFeature Distributions by Class:")
for feature in features:
    fig = px.histogram(df, x=feature, color='Species', barmode='overlay', title=f"{feature} Distribution by Class")
    fig.update_layout(barmode='overlay')
    fig.update_traces(opacity=0.75)
    fig.show()


Feature Distributions by Class:


In [None]:
1000*642

642000

In [None]:
corr_matrix = df.corr()

fig = px.imshow(corr_matrix, color_continuous_scale = 'RdBu', range_color=[-1,1], text_auto = True, aspect = "auto", labels=dict(color="Correlation"), title='Feature Correlation', height = 800)
fig.update(layout_coloraxis_showscale=False)
fig.show()





In [None]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, :-1], df['Species'], test_size=0.2, random_state=7)

In [None]:
X_train

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width
126,6.2,2.8,4.8,1.8
79,5.7,2.6,3.5,1.0
22,4.6,3.6,1.0,0.2
139,6.9,3.1,5.4,2.1
74,6.4,2.9,4.3,1.3
...,...,...,...,...
142,5.8,2.7,5.1,1.9
92,5.8,2.6,4.0,1.2
103,6.3,2.9,5.6,1.8
67,5.8,2.7,4.1,1.0


In [None]:
X_test

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width
149,5.9,3.0,5.1,1.8
84,5.4,3.0,4.5,1.5
40,5.0,3.5,1.3,0.3
66,5.6,3.0,4.5,1.5
106,4.9,2.5,4.5,1.7
41,4.5,2.3,1.3,0.3
52,6.9,3.1,4.9,1.5
94,5.6,2.7,4.2,1.3
11,4.8,3.4,1.6,0.2
51,6.4,3.2,4.5,1.5


In [None]:
y_train

126     virginica
79     versicolor
22         setosa
139     virginica
74     versicolor
          ...    
142     virginica
92     versicolor
103     virginica
67     versicolor
25         setosa
Name: Species, Length: 120, dtype: object

In [None]:
y_test

149     virginica
84     versicolor
40         setosa
66     versicolor
106     virginica
41         setosa
52     versicolor
94     versicolor
11         setosa
51     versicolor
77     versicolor
85     versicolor
32         setosa
109     virginica
28         setosa
70     versicolor
108     virginica
137     virginica
46         setosa
37         setosa
82     versicolor
120     virginica
63     versicolor
119     virginica
129     virginica
138     virginica
97     versicolor
80     versicolor
101     virginica
140     virginica
Name: Species, dtype: object

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
X_train_scaled

array([[ 0.43325894, -0.59653573,  0.62601116,  0.82090477],
       [-0.15754871, -1.05688485, -0.09737951, -0.21602757],
       [-1.45732553,  1.24486074, -1.48851543, -1.25295991],
       [ 1.26038965,  0.09398794,  0.95988378,  1.20975439],
       [ 0.669582  , -0.36636117,  0.34778398,  0.17282206],
       [-1.22100247, -0.13618661, -1.26593368, -1.12334337],
       [-0.39387177,  1.01468618, -1.32157912, -1.25295991],
       [-0.51203329,  1.93538441, -1.09899737, -0.99372682],
       [-0.86651788,  1.01468618, -1.26593368, -1.12334337],
       [ 1.49671271, -0.13618661,  1.23811096,  1.20975439],
       [ 1.02406659,  0.55433706,  1.12682009,  1.20975439],
       [ 1.14222812, -0.59653573,  0.62601116,  0.3024386 ],
       [ 0.669582  , -0.59653573,  1.07117465,  1.33937094],
       [ 0.78774353, -0.13618661,  1.01552922,  0.82090477],
       [-0.15754871, -0.13618661,  0.29213854,  0.04320551],
       [-0.98467941,  0.55433706, -1.26593368, -1.25295991],
       [ 1.02406659,  0.

In [None]:
X_test_scaled

array([[ 0.07877435, -0.13618661,  0.79294747,  0.82090477],
       [-0.51203329, -0.13618661,  0.45907485,  0.43205514],
       [-0.98467941,  1.01468618, -1.32157912, -1.12334337],
       [-0.27571024, -0.13618661,  0.45907485,  0.43205514],
       [-1.10284094, -1.28705941,  0.45907485,  0.69128823],
       [-1.57548706, -1.74740852, -1.32157912, -1.12334337],
       [ 1.26038965,  0.09398794,  0.6816566 ,  0.43205514],
       [-0.27571024, -0.82671029,  0.29213854,  0.17282206],
       [-1.22100247,  0.78451162, -1.15464281, -1.25295991],
       [ 0.669582  ,  0.3241625 ,  0.45907485,  0.43205514],
       [ 1.02406659, -0.13618661,  0.73730203,  0.69128823],
       [ 0.19693588,  0.78451162,  0.45907485,  0.56167168],
       [-0.74835635,  2.39573353, -1.21028825, -1.38257645],
       [ 1.61487424,  1.24486074,  1.34940184,  1.72822056],
       [-0.74835635,  0.78451162, -1.26593368, -1.25295991],
       [ 0.07877435,  0.3241625 ,  0.62601116,  0.82090477],
       [ 1.02406659, -1.

In [None]:
def plot_confusion_matrix(cm, labels):
    fig_cm = px.imshow(cm, labels=dict(x="Predicted", y="Actual", color="Count"),
                       x=labels, y=labels, color_continuous_scale='Viridis', text_auto = True,
                       title="Confusion Matrix")
    fig_cm.update_layout(coloraxis_showscale=False)
    fig_cm.show()

# Regresion Logística

In [None]:
lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)

In [None]:
lr_pred = lr.predict(X_test_scaled)

In [None]:
lr_accuracy = accuracy_score(y_test, lr_pred)
print(f"Logistic Regression Accuracy: {lr_accuracy:.2f}")

Logistic Regression Accuracy: 0.87


In [None]:
lr_precision = precision_score(y_test, lr_pred,average='macro')
print(f"Logistic Regression Precission: {lr_precision:.2f}")

Logistic Regression Precission: 0.88


In [None]:
lr_recall = recall_score(y_test, lr_pred, average='macro')
print(f"Logistic Regression Recall: {lr_recall:.2f}")

Logistic Regression Recall: 0.88


In [None]:
lr_f1 = f1_score(y_test, lr_pred, average='macro')
print(f"Logistic Regression F1 Score: {lr_f1:.2f}")

Logistic Regression F1 Score: 0.88


In [None]:
lr_report = classification_report(y_test, lr_pred)
print("Logistic Regression Classification Report:")
print(lr_report)

Logistic Regression Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00         7
  versicolor       0.83      0.83      0.83        12
   virginica       0.82      0.82      0.82        11

    accuracy                           0.87        30
   macro avg       0.88      0.88      0.88        30
weighted avg       0.87      0.87      0.87        30



In [None]:
lr_cm = confusion_matrix(y_test, lr_pred)
plot_confusion_matrix(lr_cm, ['setosa', 'versicolor', 'virginica'])

In [None]:
knn = KNeighborsClassifier()
knn.fit(X_train_scaled, y_train)

In [None]:
knn_pred = knn.predict(X_test_scaled)

In [None]:
knn_accuracy = accuracy_score(y_test, knn_pred)
print(f"KNN Accuracy: {knn_accuracy:.2f}")

KNN Accuracy: 0.87


In [None]:
knn_precision = precision_score(y_test, knn_pred, average='micro')
print(f"KNN Precission: {knn_precision:.2f}")

KNN Precission: 0.87


In [None]:
knn_recall = recall_score(y_test, knn_pred, average='micro')
print(f"KNN Recall: {knn_recall:.2f}")

KNN Recall: 0.87


In [None]:
knn_f1 = f1_score(y_test, knn_pred, average = 'micro')
print(f"KNN F1 Score: {knn_f1:.2f}")

KNN F1 Score: 0.87


In [None]:
knn_report = classification_report(y_test, knn_pred)
print("KNN Classification Report:")
print(knn_report)

KNN Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      0.86      0.92         7
  versicolor       0.75      1.00      0.86        12
   virginica       1.00      0.73      0.84        11

    accuracy                           0.87        30
   macro avg       0.92      0.86      0.87        30
weighted avg       0.90      0.87      0.87        30



In [None]:
knn_cm = confusion_matrix(y_test, knn_pred)
plot_confusion_matrix(knn_cm, ['setosa', 'versicolor', 'virginica'])