#  Del 06: Strojno učenje: Logistična regresija

## Logistic regression

### Classification

### Introduction to the data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

admissions = pd.read_csv("data/admissions.csv")
plt.scatter(admissions['gpa'], admissions['admit'])
plt.show()

In [None]:
plt.scatter(admissions['gre'], admissions['admit'])
plt.show()

### Logistic regression

###  Logistic function

In [None]:
import numpy as np

# Logistic Function
def logistic(x):
    # np.exp(x) raises x to the exponential power, ie e^x. e ~= 2.71828
    return np.exp(x)  / (1 + np.exp(x)) 
    
# Generate 50 real values, evenly spaced, between -6 and 6.
x = np.linspace(-6,6,50, dtype=float)

# Transform each number in t using the logistic function.
y = logistic(x)

# Plot the resulting data.
plt.plot(x, y)
plt.ylabel("Probability")
plt.show()

### Training a logistic regression model

### Plotting probabilities

In [None]:
logistic_model = LogisticRegression()
logistic_model.fit(admissions[["gpa"]], admissions["admit"])

pred_probs = logistic_model.predict_proba(admissions[["gpa"]])

plt.scatter(admissions["gpa"], pred_probs[:,1])
plt.show()

### Predict labels

In [None]:
logistic_model = LogisticRegression()
logistic_model.fit(admissions[["gpa"]], admissions["admit"])

fitted_labels = logistic_model.predict(admissions[["gpa"]])
plt.scatter(admissions["gpa"], fitted_labels)
plt.show()

## Introduction to evaluating binary classifiers

### Introduction to the Data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

In [None]:
admissions = pd.read_csv("data/admissions.csv")
model = LogisticRegression()
model.fit(admissions[["gpa"]], admissions["admit"])

### Accuracy

### Binary classification outcomes

In [None]:
true_positive_filter = (admissions["predicted_label"] == 1) & (admissions["actual_label"] == 1)
true_positives = len(admissions[true_positive_filter])

In [None]:
print(true_positives)

In [None]:
true_negative_filter = (admissions["predicted_label"] == 0) & (admissions["actual_label"] == 0)
true_negatives = len(admissions[true_negative_filter])

In [None]:
print(true_negatives)

### Sensitivity

In [None]:
# From the previous screen
true_positive_filter = (admissions["predicted_label"] == 1) & (admissions["actual_label"] == 1)
true_positives = len(admissions[true_positive_filter])

false_negative_filter = (admissions["predicted_label"] == 0) & (admissions["actual_label"] == 1)
false_negatives = len(admissions[false_negative_filter])

In [None]:
sensitivity = true_positives / (true_positives + false_negatives)

print(sensitivity)

###  Specificity

In [None]:
true_negative_filter = (admissions["predicted_label"] == 0) & (admissions["actual_label"] == 0)
true_negatives = len(admissions[true_negative_filter])

false_positive_filter = (admissions["predicted_label"] == 1) & (admissions["actual_label"] == 0)
false_positives = len(admissions[false_positive_filter])

In [None]:
specificity = (true_negatives) / (false_positives + true_negatives)
print(specificity)

### Vaja - uporaba vseh značilk

In [None]:
# prikaz podatkov
plt.scatter(admitted.loc[:, "gpa"], admitted.loc[:, "gre"], s=10, label='Admitted')
plt.scatter(not_admitted.loc[:, "gpa"], not_admitted.loc[:, "gre"], s=10, label='Not Admitted')
plt.legend()
plt.show()

In [None]:
b = model.intercept_[0]
w1, w2 = model.coef_.T
c = -b/w2
m = -w1/w2

In [None]:
xmin, xmax = admitted['gpa'].min(), admitted['gpa'].max()
ymin, ymax = admitted['gre'].min(), admitted['gre'].max()
xd = np.array([xmin, xmax])
yd = m*xd + c
plt.plot(xd, yd, 'k', lw=1, ls='--')
plt.fill_between(xd, yd, ymin, color='tab:orange', alpha=0.2)
plt.fill_between(xd, yd, ymax, color='tab:blue', alpha=0.2)

plt.scatter(admitted.loc[:, "gpa"], admitted.loc[:, "gre"], s=10, label='Admitted')
plt.scatter(not_admitted.loc[:, "gpa"], not_admitted.loc[:, "gre"], s=10, label='Not Admitted')
plt.xlim(xmin, xmax)
plt.ylim(ymin, ymax)
plt.ylabel(r'gre')
plt.xlabel(r'gpa')
plt.legend()
plt.show()

## Multiclass classification

### Introduction to the data

In [None]:
import pandas as pd
cars = pd.read_csv("data/auto.csv")

In [None]:
cars.head()

### Dummy variables

### Multiclass classification

### Training a multiclass logistic regression model

In [None]:
from sklearn.linear_model import LogisticRegression

unique_origins = cars["origin"].unique()
unique_origins.sort()
unique_origins

### Testing the models

### Choose the origin