# Example Notebook on Codesphere

In [None]:
pip install pandas matplotlib scikit-learn

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

In [3]:
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)

In [None]:
df.describe()

## Plot the data

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(10,8))

for i, ax in enumerate(axes.ravel()):
    ax.hist(df.iloc[:, i], bins=15)
    ax.set_xlabel(df.columns[i])
    ax.set_ylabel('Frequency')
    ax.axvline(df.iloc[:, i].mean(), color='r', linestyle='dashed', linewidth=1)
    ax.axvline(df.iloc[:, i].median(), color='g', linestyle='dashed', linewidth=1)
    
plt.show()

## Train a small machine learning Model

In [None]:
# Split data to train and test set
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df, iris.target, test_size=0.2)

# Use KNN as the classifier and fit it with the training data
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier()
knn.fit(X_train, y_train)

In [None]:
# Predict on the test set
from sklearn.metrics import accuracy_score

y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

## Plot the predictions

In [None]:
plt.scatter(X_test.iloc[:, 0], X_test.iloc[:, 1], c=y_test, cmap='viridis', label='Ground Truth')

# plot predicted values
plt.scatter(X_test.iloc[:, 0], X_test.iloc[:, 1], c=y_pred, cmap='viridis', marker='x', label='Predicted')

# add plot labels and legend
plt.xlabel('sepal length (cm)')
plt.ylabel('sepal width (cm)')
plt.title('Ground Truth vs Predicted')
plt.legend()

# display plot
plt.show()