In [1]:
import pandas as pd
import glob
import os

# Tüm CSV' leri Birleştirme

In [2]:
DATASET_DIR = "./Dataset"   
OUTFILE = "fenerbahce_beko.csv"

In [3]:
csv_files = sorted(glob.glob(os.path.join(DATASET_DIR, "fenerbahce_*.csv")))
print(f"Bulunan dosya sayısı: {len(csv_files)}")
for f in csv_files:
    print(" -", os.path.basename(f))

Bulunan dosya sayısı: 24
 - fenerbahce_2001-02.csv
 - fenerbahce_2002-03.csv
 - fenerbahce_2003-04.csv
 - fenerbahce_2004-05.csv
 - fenerbahce_2005-06.csv
 - fenerbahce_2006-07.csv
 - fenerbahce_2007-08.csv
 - fenerbahce_2008-09.csv
 - fenerbahce_2009-10.csv
 - fenerbahce_2010-11.csv
 - fenerbahce_2011-12.csv
 - fenerbahce_2012-13.csv
 - fenerbahce_2013-14.csv
 - fenerbahce_2014-15.csv
 - fenerbahce_2015-16.csv
 - fenerbahce_2016-17.csv
 - fenerbahce_2017-18.csv
 - fenerbahce_2018-19.csv
 - fenerbahce_2019-20.csv
 - fenerbahce_2020-21.csv
 - fenerbahce_2021-22.csv
 - fenerbahce_2022-23.csv
 - fenerbahce_2023-24.csv
 - fenerbahce_2024-25.csv


In [4]:
dfs = [pd.read_csv(f) for f in csv_files]
df = pd.concat(dfs, ignore_index=True)

In [5]:
print("Veri boyutu:", df.shape)
print("Sütun isimleri:", list(df.columns)[:30])
df.to_csv(OUTFILE, index=False)
print(f"\nKaydedildi: {OUTFILE}")

Veri boyutu: (629, 15)
Sütun isimleri: ['Date', 'Opposing Team', 'IsHome', 'Points', 'Performance Index Rating', 'Two-point %', 'Three-point %', 'Free-throw %', 'Offensive rebounds', 'Defensive rebounds', 'Total rebounds', 'Assists', 'Steals', 'Blocks', 'Turnovers']

Kaydedildi: fenerbahce_beko.csv


# Kolon Açıklamaları

<mark>Date:</mark> Maçın oynandığı tarih <br>
<mark>Opposing Team:</mark> Fenerbahçe Beko’nun karşılaştığı rakip takım <br>
<mark>IsHome:</mark> Fenerbahçe Beko’nun ev sahibi / deplasman durumu <br>
<mark>Points:</mark> Fenerbahçe Beko’nun maçta attığı toplam sayı <br>
<mark>Performance Index Rating:</mark> Fenerbahçe Beko’nun performans verimlilik puanı (target = y) <br>
<mark>Two Point %:</mark> Fenerbahçe Beko’nun maçtaki iki sayılık atış yüzdesi <br>
<mark>Three Point %:</mark> Fenerbahçe Beko’nun maçtaki üç sayılık atış yüzdesi <br>
<mark>Free Throw %:</mark> Fenerbahçe Beko’nun maçtaki serbest atış yüzdesi <br>
<mark>Offensive Rebounds:</mark> Fenerbahçe Beko’nun maçta aldığı hücum ribaundu <br>
<mark>Defensive Rebounds:</mark> Fenerbahçe Beko’nun maçta aldığı savunma ribaundu <br>
<mark>Total Rebounds:</mark> Fenerbahçe Beko’nun maçta aldığı toplam ribaund <br>
<mark>Assists:</mark> Fenerbahçe Beko’nun maçtaki asist sayısı <br>
<mark>Steals:</mark> Fenerbahçe Beko’nun maçtaki top çalma sayısı <br>
<mark>Blocks:</mark> Fenerbahçe Beko’nun maçta yaptığı blok sayısı <br>
<mark>Turnovers:</mark> Fenerbahçe Beko’nun maçtaki top kaybı sayısı <br>

# Exploratory Data Analysis

In [6]:
df = pd.read_csv('fenerbahce_beko.csv')
df.head()

Unnamed: 0,Date,Opposing Team,IsHome,Points,Performance Index Rating,Two-point %,Three-point %,Free-throw %,Offensive rebounds,Defensive rebounds,Total rebounds,Assists,Steals,Blocks,Turnovers
0,18 Apr 2002 (Thu),CSKA Moscow,1,96.0,88.0,51.2%,51.9%,80.0%,5.0,13.0,18.0,22.0,11.0,1.0,11.0
1,27 Mar 2002 (Wed),Tau Ceramica,1,72.0,64.0,45.0%,44.4%,70.6%,5.0,16.0,21.0,19.0,6.0,2.0,12.0
2,21 Mar 2002 (Thu),Maccabi Elite,1,78.0,71.0,50.0%,25.0%,78.1%,11.0,18.0,29.0,10.0,7.0,2.0,12.0
3,6 Mar 2002 (Wed),CSKA Moscow,0,80.0,76.0,56.4%,33.3%,65.2%,9.0,24.0,33.0,15.0,4.0,0.0,15.0
4,27 Feb 2002 (Wed),Tau Ceramica,0,69.0,36.0,46.8%,18.8%,61.5%,13.0,15.0,28.0,9.0,6.0,0.0,17.0


In [7]:
# Tarih İşlemleri

In [8]:
df['Date_clean'] = df['Date'].astype(str).str.extract(r'(\d{1,2}\s+[A-Za-z]+\s+\d{4})')[0]

# Datetime'a çevir
df['Date_parsed'] = pd.to_datetime(df['Date_clean'], format='%d %b %Y', errors='coerce')

# Gün / Ay / Yıl sütunları
df['Day'] = df['Date_parsed'].dt.day
df['Month'] = df['Date_parsed'].dt.month
df['Year'] = df['Date_parsed'].dt.year

# Tarihe göre sıralama
df = df.sort_values(by='Date_parsed').reset_index(drop=True)

In [9]:
df.tail()

Unnamed: 0,Date,Opposing Team,IsHome,Points,Performance Index Rating,Two-point %,Three-point %,Free-throw %,Offensive rebounds,Defensive rebounds,Total rebounds,Assists,Steals,Blocks,Turnovers,Date_clean,Date_parsed,Day,Month,Year
624,22 Apr 2025 (Tue),Paris Basketball,1,83.0,94.0,50.0%,36.4%,75.0%,19.0,21.0,40.0,18.0,8.0,0.0,10.0,22 Apr 2025,2025-04-22,22,4,2025
625,24 Apr 2025 (Thu),Paris Basketball,1,89.0,113.0,61.8%,47.8%,82.4%,5.0,26.0,31.0,17.0,5.0,4.0,9.0,24 Apr 2025,2025-04-24,24,4,2025
626,29 Apr 2025 (Tue),Paris Basketball,0,98.0,116.0,55.8%,41.7%,80.0%,10.0,28.0,38.0,21.0,5.0,1.0,11.0,29 Apr 2025,2025-04-29,29,4,2025
627,23 May 2025 (Fri),Panathinaikos AKTOR Athens,1,82.0,93.0,50.0%,41.2%,80.0%,10.0,23.0,33.0,17.0,7.0,4.0,9.0,23 May 2025,2025-05-23,23,5,2025
628,25 May 2025 (Sun),AS Monaco,0,81.0,90.0,42.1%,40.9%,84.6%,14.0,24.0,38.0,8.0,8.0,2.0,9.0,25 May 2025,2025-05-25,25,5,2025


In [10]:
df = df.drop(columns=["Date", "Date_clean"], axis=1)
df['Date'] = df["Date_parsed"]
df = df.drop("Date_parsed", axis=1)

In [11]:
df.head()

Unnamed: 0,Opposing Team,IsHome,Points,Performance Index Rating,Two-point %,Three-point %,Free-throw %,Offensive rebounds,Defensive rebounds,Total rebounds,Assists,Steals,Blocks,Turnovers,Day,Month,Year,Date
0,Kinder London Towers,1,107.0,143.0,70.8%,36.8%,64.3%,11.0,27.0,38.0,21.0,16.0,1.0,14.0,11,10,2001,2001-10-11
1,Zalgiris,0,82.0,66.0,42.0%,52.6%,52.6%,17.0,20.0,37.0,15.0,7.0,1.0,17.0,18,10,2001,2001-10-18
2,Kinder Virtus Bologna,0,60.0,42.0,44.2%,25.0%,58.3%,11.0,23.0,34.0,5.0,10.0,1.0,16.0,24,10,2001,2001-10-24
3,Union Olimpija,1,75.0,72.0,59.5%,14.3%,65.5%,19.0,17.0,36.0,15.0,8.0,1.0,19.0,1,11,2001,2001-11-01
4,Opel Skyliners,0,70.0,51.0,42.2%,33.3%,66.7%,16.0,18.0,34.0,13.0,1.0,3.0,14.0,8,11,2001,2001-11-08


In [12]:
df.columns

Index(['Opposing Team', 'IsHome', 'Points', 'Performance Index Rating',
       'Two-point %', 'Three-point %', 'Free-throw %', 'Offensive rebounds',
       'Defensive rebounds', 'Total rebounds', 'Assists', 'Steals', 'Blocks',
       'Turnovers', 'Day', 'Month', 'Year', 'Date'],
      dtype='object')

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 629 entries, 0 to 628
Data columns (total 18 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Opposing Team             629 non-null    object        
 1   IsHome                    629 non-null    int64         
 2   Points                    622 non-null    float64       
 3   Performance Index Rating  622 non-null    float64       
 4   Two-point %               622 non-null    object        
 5   Three-point %             622 non-null    object        
 6   Free-throw %              622 non-null    object        
 7   Offensive rebounds        622 non-null    float64       
 8   Defensive rebounds        622 non-null    float64       
 9   Total rebounds            622 non-null    float64       
 10  Assists                   622 non-null    float64       
 11  Steals                    622 non-null    float64       
 12  Blocks                

In [14]:
for col in ["Two-point %", "Three-point %", "Free-throw %"]:
    df[col] = df[col].str.replace("%", "").astype(float)

In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 629 entries, 0 to 628
Data columns (total 18 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Opposing Team             629 non-null    object        
 1   IsHome                    629 non-null    int64         
 2   Points                    622 non-null    float64       
 3   Performance Index Rating  622 non-null    float64       
 4   Two-point %               622 non-null    float64       
 5   Three-point %             622 non-null    float64       
 6   Free-throw %              622 non-null    float64       
 7   Offensive rebounds        622 non-null    float64       
 8   Defensive rebounds        622 non-null    float64       
 9   Total rebounds            622 non-null    float64       
 10  Assists                   622 non-null    float64       
 11  Steals                    622 non-null    float64       
 12  Blocks                

In [16]:
df.tail()

Unnamed: 0,Opposing Team,IsHome,Points,Performance Index Rating,Two-point %,Three-point %,Free-throw %,Offensive rebounds,Defensive rebounds,Total rebounds,Assists,Steals,Blocks,Turnovers,Day,Month,Year,Date
624,Paris Basketball,1,83.0,94.0,50.0,36.4,75.0,19.0,21.0,40.0,18.0,8.0,0.0,10.0,22,4,2025,2025-04-22
625,Paris Basketball,1,89.0,113.0,61.8,47.8,82.4,5.0,26.0,31.0,17.0,5.0,4.0,9.0,24,4,2025,2025-04-24
626,Paris Basketball,0,98.0,116.0,55.8,41.7,80.0,10.0,28.0,38.0,21.0,5.0,1.0,11.0,29,4,2025,2025-04-29
627,Panathinaikos AKTOR Athens,1,82.0,93.0,50.0,41.2,80.0,10.0,23.0,33.0,17.0,7.0,4.0,9.0,23,5,2025,2025-05-23
628,AS Monaco,0,81.0,90.0,42.1,40.9,84.6,14.0,24.0,38.0,8.0,8.0,2.0,9.0,25,5,2025,2025-05-25


In [17]:
df[df["Points"].isna()]

Unnamed: 0,Opposing Team,IsHome,Points,Performance Index Rating,Two-point %,Three-point %,Free-throw %,Offensive rebounds,Defensive rebounds,Total rebounds,Assists,Steals,Blocks,Turnovers,Day,Month,Year,Date
436,Panathinaikos OPAP Athens,1,,,,,,,,,,,,,13,3,2020,2020-03-13
437,FC Bayern Munich,0,,,,,,,,,,,,,20,3,2020,2020-03-20
438,Zalgiris Kaunas,0,,,,,,,,,,,,,24,3,2020,2020-03-24
439,CSKA Moscow,1,,,,,,,,,,,,,26,3,2020,2020-03-26
440,Olympiacos Piraeus,1,,,,,,,,,,,,,2,4,2020,2020-04-02
441,Anadolu Efes Istanbul,0,,,,,,,,,,,,,10,4,2020,2020-04-10
515,FC Barcelona,0,,,,,,,,,,,,,4,11,2022,2022-11-04


In [18]:
df[df["Turnovers"].isna()] # tüm NaN içeren satırlar

Unnamed: 0,Opposing Team,IsHome,Points,Performance Index Rating,Two-point %,Three-point %,Free-throw %,Offensive rebounds,Defensive rebounds,Total rebounds,Assists,Steals,Blocks,Turnovers,Day,Month,Year,Date
58,Pamesa Valencia,0,72.0,68.0,64.9,21.7,64.3,7.0,13.0,20.0,15.0,15.0,14.0,,8,4,2004,2004-04-08
122,Real Madrid,0,77.0,82.0,59.1,20.0,72.2,9.0,19.0,28.0,17.0,12.0,12.0,,13,12,2007,2007-12-13
354,Anadolu Efes Istanbul,0,89.0,107.0,60.0,50.0,76.9,8.0,21.0,29.0,22.0,6.0,11.0,,19,1,2018,2018-01-19
413,Zalgiris Kaunas,1,76.0,67.0,47.4,38.1,61.5,10.0,13.0,23.0,15.0,7.0,12.0,,1,11,2019,2019-11-01
433,Valencia Basket,0,93.0,109.0,75.0,56.0,90.0,3.0,21.0,24.0,24.0,8.0,13.0,,28,2,2020,2020-02-28
436,Panathinaikos OPAP Athens,1,,,,,,,,,,,,,13,3,2020,2020-03-13
437,FC Bayern Munich,0,,,,,,,,,,,,,20,3,2020,2020-03-20
438,Zalgiris Kaunas,0,,,,,,,,,,,,,24,3,2020,2020-03-24
439,CSKA Moscow,1,,,,,,,,,,,,,26,3,2020,2020-03-26
440,Olympiacos Piraeus,1,,,,,,,,,,,,,2,4,2020,2020-04-02


In [19]:
# Turnovers = NaN değerleri mod ile doldur
df["Turnovers"] = df["Turnovers"].fillna(df["Turnovers"].mode()[0])

In [20]:
df[df["Points"].isna()]

Unnamed: 0,Opposing Team,IsHome,Points,Performance Index Rating,Two-point %,Three-point %,Free-throw %,Offensive rebounds,Defensive rebounds,Total rebounds,Assists,Steals,Blocks,Turnovers,Day,Month,Year,Date
436,Panathinaikos OPAP Athens,1,,,,,,,,,,,,12.0,13,3,2020,2020-03-13
437,FC Bayern Munich,0,,,,,,,,,,,,12.0,20,3,2020,2020-03-20
438,Zalgiris Kaunas,0,,,,,,,,,,,,12.0,24,3,2020,2020-03-24
439,CSKA Moscow,1,,,,,,,,,,,,12.0,26,3,2020,2020-03-26
440,Olympiacos Piraeus,1,,,,,,,,,,,,12.0,2,4,2020,2020-04-02
441,Anadolu Efes Istanbul,0,,,,,,,,,,,,12.0,10,4,2020,2020-04-10
515,FC Barcelona,0,,,,,,,,,,,,12.0,4,11,2022,2022-11-04


In [21]:
col_for_mode = ['Points', "Performance Index Rating", "Two-point %", "Three-point %", "Free-throw %","Offensive rebounds", 
                "Defensive rebounds", "Total rebounds", "Assists", "Steals", "Blocks"]

for col in col_for_mode:
    df[col] = df[col].fillna(df[col].mode()[0])

In [22]:
df.isnull().sum()

Opposing Team               0
IsHome                      0
Points                      0
Performance Index Rating    0
Two-point %                 0
Three-point %               0
Free-throw %                0
Offensive rebounds          0
Defensive rebounds          0
Total rebounds              0
Assists                     0
Steals                      0
Blocks                      0
Turnovers                   0
Day                         0
Month                       0
Year                        0
Date                        0
dtype: int64