# Classification


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import balanced_accuracy_score

# pip install pandas matplotlib scikit-learn

In [None]:
path = "/home/a-schulz/Projects/applied_artificial_intelligence/src/week_2/datasets/winequality-red.csv"

# Read file
file = pd.read_csv(path)
# Get information about the file
file.info()

# Get Columns for the dataset
colNames = list(file.columns)
colNamesX= colNames.copy()
colNamesX.remove("quality")

# Prepare Dataset
lenDataset = file.shape[0]
shuffledIndices = np.random.permutation(lenDataset)

# Split target from dataset
X = file[colNamesX].values
Y = file["quality"].values

# Get unique values of the target variable
print("Available values for target:", list(set(Y)))

In [None]:
# Split into test (20%) and Test (80%)
_thresh = int(np.floor(lenDataset * 0.8))

TrainX, TrainY = X[shuffledIndices[:_thresh]], Y[shuffledIndices[:_thresh]]
TestX, TestY = X[shuffledIndices[_thresh:]], Y[shuffledIndices[_thresh:]]

In [None]:
# Create classifier
clf = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(12, ), random_state=1)

# Train classifier and get predictions
clf.fit(TrainX, TrainY)
preds = clf.predict(TestX)

#Count occurrences of each unique element
unique_elements, counts = [], []
for element in set(preds):
    unique_elements.append(element)
    counts.append(list(preds).count(element))

# Step 2: Plot the pie chart
plt.figure(figsize=(6, 6))
plt.pie(counts, labels=unique_elements, autopct='%1.1f%%')
plt.title('Count of Same Elements in the Array')
plt.show()

cm = confusion_matrix(TestY, preds)
print("Confusion matrix\n", cm)

acc = accuracy_score(TestY, preds)
print("Accuracy score\n", acc)

accb = balanced_accuracy_score(TestY, preds)
print("Balanced accuracy score\n", accb)


# Regression


In [50]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error as MSE

In [66]:
path = "/home/a-schulz/Projects/applied_artificial_intelligence/src/week_3/Datasets/Regression1.csv"
file = pd.read_csv(path)
file.info()

# Columns
columns = list(file.columns)
columnsX = columns.copy()
columnsX.remove("Y")

print("Current target values:", set(list(file["Y"])))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 370 entries, 0 to 369
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   X1      370 non-null    float64
 1   X2      370 non-null    float64
 2   X3      370 non-null    float64
 3   X4      370 non-null    float64
 4   X5      370 non-null    float64
 5   X6      370 non-null    float64
 6   X7      370 non-null    float64
 7   Y       370 non-null    float64
dtypes: float64(8)
memory usage: 23.3 KB
Current target values: {0.5826091261015867, 1.3764114466674071, 2.369475377096161, 1.1391840058341671, 1.8042090682658696, 2.311648789980758, 2.873376267534418, 0.7260341815805482, 0.6875420538672401, -0.6627150597684937, 1.480040633401729, 2.370354449852586, -1.3036076340875196, 0.6283380921908734, -1.0581374266214985, -1.5221395070123824, -0.2204111575775549, -0.1802418797243854, -0.3079730797150679, -0.3995823843329988, 0.3527578500042728, -0.0284098294942518, -0.75341754866525

In [56]:
shuffledIndices = np.random.permutation(file.shape[0])
_thresh = int(np.floor(file.shape[0] * 0.8))

X = file[columnsX].values
Y = file["Y"].values

TrainX, TrainY = X[shuffledIndices[:_thresh]], Y[shuffledIndices[:_thresh]]
TestX, TestY = X[shuffledIndices[_thresh:]], Y[shuffledIndices[_thresh:]]

In [58]:
reg = MLPRegressor(solver='adam', alpha=1e-5, hidden_layer_sizes=(12, ), random_state=1)
reg.fit(TrainX, TrainY)

preds = reg.predict(TestX)

mse = MSE(TestY, preds)
print("Mean squared error\n", mse)

Mean squared error
 9.11317939588992


