In [1]:
import pandas as pd

# İlk CSV dosyasını oku
df1 = pd.read_csv('2020-2021.csv')

# İkinci CSV dosyasını oku
df2 = pd.read_csv('2021-2022.csv')

# İki veri çerçevesini birleştir
df = pd.concat([df1, df2], ignore_index=True)

In [2]:
# Birleştirilmiş veri çerçevesini yazdır veya istediğiniz işlemleri uygulayabilirsiniz
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 760 entries, 0 to 759
Columns: 106 entries, Div to AvgCAHA
dtypes: float64(82), int64(16), object(8)
memory usage: 629.5+ KB


In [3]:
object_columns = df.select_dtypes(include=['object']).columns
print(object_columns)

Index(['Div', 'Date', 'Time', 'HomeTeam', 'AwayTeam', 'FTR', 'HTR', 'Referee'], dtype='object')


In [4]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
for column in ['Div', 'Date', 'Time', 'HomeTeam', 'AwayTeam', 'FTR', 'HTR', 'Referee']:
    df[column] = label_encoder.fit_transform(df[column])

In [5]:
hedef_degisken = "FTR"
Y = df[hedef_degisken]

# Bağımsız değişkenler (X): "FTR" sütununu hariç tüm sütunlar
X = df.drop(columns=[hedef_degisken])

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score, confusion_matrix, roc_curve, auc
from sklearn.model_selection import cross_val_score


In [7]:
# Veri setini eğitim ve test setlerine ayır
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Decision Tree modelini oluştur
model = DecisionTreeClassifier()

# Modeli eğit
model.fit(X_train, y_train)

# Tahmin yap
y_pred = model.predict(X_test)

In [8]:
# Modelin tahminleri
y_pred = model.predict(X_test)

# Confusion Matrix'i elde et
conf_matrix = confusion_matrix(y_test, y_pred)

# True positive, false positive, true negative, false negative değerlerini al
TP = conf_matrix[1, 1]
FP = conf_matrix[0, 1]
TN = conf_matrix[0, 0]
FN = conf_matrix[1, 0]

#Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Sensitivity (Recall)
sensitivity = TP / (TP + FN)
print(f"Sensitivity (Recall): {sensitivity}")

# Specificity
specificity = TN / (TN + FP)
print(f"Specificity: {specificity}")

# Precision
precision = TP / (TP + FP)
print(f"Precision: {precision}")

# F1 Score
f1_score = 2 * (precision * sensitivity) / (precision + sensitivity)
print(f"F1 Score: {f1_score}")

#AUC (Area under the ROC Curve)
#fpr, tpr, thresholds = roc_curve(y_test, model.predict_proba(X_test)[:, 1])
#roc_auc = auc(fpr, tpr)
#print(f"AUC (Area under the ROC Curve): {roc_auc}")

Accuracy: 0.993421052631579
Sensitivity (Recall): 1.0
Specificity: 1.0
Precision: 1.0
F1 Score: 1.0


In [9]:
# df: Veri çerçeveniz
# hedef_degisken ve X'yi belirledikten sonra
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
# Çapraz doğrulama ile model performansını değerlendir
cv_scores = cross_val_score(model, X_train, y_train, cv=10, scoring='accuracy')

# Elde edilen çapraz doğrulama skorlarını yazdır
print("Çapraz Doğrulama Skorları:", cv_scores)

# Ortalama çapraz doğrulama skorunu hesapla
mean_cv_score = cv_scores.mean()
print("Ortalama Çapraz Doğrulama Skoru:", mean_cv_score)

Çapraz Doğrulama Skorları: [0.98360656 1.         1.         1.         0.98360656 1.
 1.         0.98360656 0.98333333 1.        ]
Ortalama Çapraz Doğrulama Skoru: 0.9934153005464481


In [13]:
X_train.isna().sum()

Div         0
Date        0
Time        0
HomeTeam    0
AwayTeam    0
           ..
PCAHA       0
MaxCAHH     0
MaxCAHA     0
AvgCAHH     0
AvgCAHA     0
Length: 105, dtype: int64

In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score


# Etiket ve özellikleri ayır
hedef_degisken = "FTR"  
Y = df[hedef_degisken]
X = df.drop(columns=[hedef_degisken])

# Veri setini eğitim ve test setlerine böl
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Eksik değerleri doldurmak için SimpleImputer kullan
model = make_pipeline(SimpleImputer(strategy='mean'), HistGradientBoostingClassifier())

# Modeli eğit
model.fit(X_train, y_train)

# Test seti üzerinde tahmin yap
y_pred = model.predict(X_test)

# Performans metriklerini değerlendir
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='micro')  # veya 'macro', 'weighted'
recall = recall_score(y_test, y_pred, average='micro')
f1 = f1_score(y_test, y_pred, average='micro')
#roc_auc = roc_auc_score(y_test, model.decision_function(X_test), multi_class='ovr')

# Sonuçları yazdır
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall (Sensitivity): {recall}")
print(f"F1 Score: {f1}")
#print(f"AUC (Area under the ROC Curve): {roc_auc}")


Accuracy: 1.0
Precision: 1.0
Recall (Sensitivity): 1.0
F1 Score: 1.0


In [12]:
# Confusion Matrix'i elde et
conf_matrix = confusion_matrix(y_test, y_pred)

# True positive, false positive, true negative, false negative değerlerini al
TP = conf_matrix[1, 1]
FP = conf_matrix[0, 1]
TN = conf_matrix[0, 0]
FN = conf_matrix[1, 0]

#Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Sensitivity (Recall)
sensitivity = TP / (TP + FN)
print(f"Sensitivity (Recall): {sensitivity}")

# Specificity
specificity = TN / (TN + FP)
print(f"Specificity: {specificity}")

# Precision
precision = TP / (TP + FP)
print(f"Precision: {precision}")

# F1 Score
f1_score = 2 * (precision * sensitivity) / (precision + sensitivity)
print(f"F1 Score: {f1_score}")

#AUC (Area under the ROC Curve)
#fpr, tpr, thresholds = roc_curve(y_test, model.predict_proba(X_test)[:, 1])
#roc_auc = auc(fpr, tpr)
#print(f"AUC (Area under the ROC Curve): {roc_auc}");

Accuracy: 0.993421052631579
Sensitivity (Recall): 1.0
Specificity: 1.0
Precision: 1.0
F1 Score: 1.0
