In [6]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# 1. Verilerin Yüklenmesi
footballers = pd.read_csv('../raw_data/footballers.csv')
football_teams = pd.read_csv('../processed_data/football_teams_cleaned.csv')
leagues = pd.read_csv('../raw_data/leagues.csv')

# 2. Verileri Birleştirme (Ortak Sütunlar için)
teams_with_leagues = pd.merge(football_teams, leagues, on=['league_id', 'league_name'])
merged_data = pd.merge(footballers, teams_with_leagues, on=['team_id', 'league_id', 'league_name'])

# 3. Ortak Sütunları Seçme
common_columns = [
    'footballer_id', 'footballer_name', 'league_id', 'league_name', 'league_logo_path', 
    'team_id', 'club', 'img_path', 'birthday', 'nationality_img_path', 
    'footballer_img_path'
]

# 4. 'created_at' ve 'timestamp' Sütunları için Rastgele Veriler Üretme
num_records = len(merged_data)

# "created_at" ve "timestamp" rastgele tarih verisi üretmek için
created_at = [datetime.now() - timedelta(days=np.random.randint(1, 1000)) for _ in range(num_records)]
timestamp = [datetime.now().strftime('%Y-%m-%d %H:%M:%S') for _ in range(num_records)]

# 5. Verileri Birleştirme
common_data = merged_data[common_columns].copy()
common_data['created_at'] = created_at
common_data['timestamp'] = timestamp

# 6. Sentetik Verilerin Eklenmesi

# Physical Verileri
np.random.seed(42)  # Tekrar edilebilirlik için
physical_data = common_data.copy()
physical_data['muscle_mass'] = np.random.uniform(50, 100, num_records)  # kg
physical_data['muscle_strength'] = np.random.uniform(200, 500, num_records)  # kg
physical_data['muscle_endurance'] = np.random.uniform(1, 10, num_records)  # level
physical_data['flexibility'] = np.random.uniform(1, 10, num_records)  # level
physical_data['weight'] = np.random.uniform(60, 100, num_records)  # kg
physical_data['body_fat_percentage'] = np.random.uniform(10, 25, num_records)  # %
physical_data['heights'] = np.random.uniform(1.6, 2.1, num_records)  # meters
physical_data['thigh_circumference'] = np.random.uniform(50, 70, num_records)  # cm
physical_data['shoulder_circumference'] = np.random.uniform(100, 140, num_records)  # cm
physical_data['arm_circumference'] = np.random.uniform(30, 50, num_records)  # cm
physical_data['chest_circumference'] = np.random.uniform(80, 120, num_records)  # cm
physical_data['back_circumference'] = np.random.uniform(80, 110, num_records)  # cm
physical_data['waist_circumference'] = np.random.uniform(60, 100, num_records)  # cm
physical_data['leg_circumference'] = np.random.uniform(40, 60, num_records)  # cm
physical_data['calf_circumference'] = np.random.uniform(30, 50, num_records)  # cm

# Conditional Verileri
conditional_data = common_data.copy()

# Conditional verileri oluştururken, physical_data'dan gerekli sütunları ekleyelim
conditional_data['muscle_strength'] = physical_data['muscle_strength']  # physical verisinden alınan sütun

conditional_data['VO2_max'] = np.random.uniform(40, 70, num_records)  # ml/kg/min
conditional_data['lactate_levels'] = np.random.uniform(1, 5, num_records)  # mmol/L
conditional_data['training_intensity'] = np.random.uniform(1, 10, num_records)  # level
conditional_data['recovery_times'] = np.random.randint(24, 72, num_records)  # hours
conditional_data['current_VO2_max'] = np.random.uniform(30, 60, num_records)  # ml/kg/min
conditional_data['current_lactate_levels'] = np.random.uniform(1, 5, num_records)  # mmol/L
conditional_data['current_muscle_strength'] = np.random.uniform(200, 500, num_records)  # kg
conditional_data['target_VO2_max'] = conditional_data['VO2_max'] + np.random.uniform(1, 5, num_records)
conditional_data['target_lactate_level'] = np.random.uniform(1, 3, num_records)
conditional_data['target_muscle_strength'] = conditional_data['muscle_strength'] + np.random.uniform(5, 50, num_records)

# Endurance Verileri
endurance_data = common_data.copy()
endurance_data['running_distance'] = np.random.uniform(3, 15, num_records)  # km
endurance_data['average_speed'] = np.random.uniform(5, 15, num_records)  # km/h
endurance_data['heart_rate'] = np.random.uniform(60, 200, num_records)  # bpm
endurance_data['zones_x'] = np.random.randint(1, 5, num_records)  # zones
endurance_data['training_intensity'] = np.random.uniform(1, 10, num_records)  # level
endurance_data['session'] = np.random.randint(1, 100, num_records)  # session ID

# 7. Verileri Kaydetme
physical_data.to_csv('../processed_data/physical.csv', index=False)
conditional_data.to_csv('../processed_data/conditional.csv', index=False)
endurance_data.to_csv('../processed_data/endurance.csv', index=False)

print("Veriler başarıyla oluşturuldu ve kaydedildi!")


Veriler başarıyla oluşturuldu ve kaydedildi!


In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Sabitler
NUM_FOOTBALLERS = 1584
START_DATE = datetime(2024, 11, 1)
END_DATE = datetime(2024, 12, 31)

# Tarih aralığını oluştur
num_days = (END_DATE - START_DATE).days + 1
dates = [START_DATE + timedelta(days=i) for i in range(num_days)]

# Sentetik veri üretimi için yardımcı fonksiyonlar
def generate_physical_data(footballer_id, date):
    return {
        "footballer_id": footballer_id,
        "muscle_mass": round(np.random.uniform(60, 90), 2),
        "muscle_strength": round(np.random.uniform(70, 100), 2),
        "muscle_endurance": round(np.random.uniform(50, 80), 2),
        "flexibility": round(np.random.uniform(30, 50), 2),
        "weight": round(np.random.uniform(70, 100), 2),
        "body_fat_percentage": round(np.random.uniform(10, 20), 2),
        "heights": round(np.random.uniform(170, 200), 2),
        "thigh_circumference": round(np.random.uniform(50, 60), 2),
        "shoulder_circumference": round(np.random.uniform(100, 120), 2),
        "arm_circumference": round(np.random.uniform(30, 40), 2),
        "chest_circumference": round(np.random.uniform(90, 110), 2),
        "back_circumference": round(np.random.uniform(80, 100), 2),
        "waist_circumference": round(np.random.uniform(70, 90), 2),
        "leg_circumference": round(np.random.uniform(50, 60), 2),
        "calf_circumference": round(np.random.uniform(30, 40), 2),
        "created_at": date,
        "timestamp": date,
    }

def generate_conditional_data(footballer_id, date):
    return {
        "footballer_id": footballer_id,
        "VO2_max": round(np.random.uniform(40, 60), 2),
        "lactate_levels": round(np.random.uniform(1, 4), 2),
        "training_intensity": round(np.random.uniform(70, 90), 2),
        "recovery_times": round(np.random.uniform(12, 24), 2),
        "current_VO2_max": round(np.random.uniform(40, 60), 2),
        "current_lactate_levels": round(np.random.uniform(1, 4), 2),
        "current_muscle_strength": round(np.random.uniform(70, 100), 2),
        "target_VO2_max": round(np.random.uniform(50, 65), 2),
        "target_lactate_level": round(np.random.uniform(1, 3), 2),
        "target_muscle_strength": round(np.random.uniform(75, 105), 2),
        "created_at": date,
        "timestamp": date,
    }

def generate_endurance_data(footballer_id, date):
    return {
        "footballer_id": footballer_id,
        "running_distance": round(np.random.uniform(5, 15), 2),
        "average_speed": round(np.random.uniform(5, 10), 2),
        "heart_rate": np.random.randint(120, 180),
        "peak_heart_rate": np.random.randint(180, 200),
        "training_intensity": round(np.random.uniform(70, 90), 2),
        "session": np.random.randint(1, 3),
        "created_at": date,
        "timestamp": date,
    }

# Verileri oluştur
physical_data = []
conditional_data = []
endurance_data = []

for footballer_id in range(1, NUM_FOOTBALLERS + 1):
    for date in dates:
        physical_data.append(generate_physical_data(footballer_id, date))
        conditional_data.append(generate_conditional_data(footballer_id, date))
        endurance_data.append(generate_endurance_data(footballer_id, date))

# Verileri DataFrame'lere dönüştür
physical_df = pd.DataFrame(physical_data)
conditional_df = pd.DataFrame(conditional_data)
endurance_df = pd.DataFrame(endurance_data)

# Verileri CSV dosyalarına kaydet
physical_df.to_csv('../processed_data/physical.csv', index=False)
conditional_df.to_csv('../processed_data/conditional.csv', index=False)
endurance_df.to_csv('../processed_data/endurance.csv', index=False)

print("Veriler başarıyla oluşturuldu ve kaydedildi.")

Veriler başarıyla oluşturuldu ve kaydedildi.


In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Sabitler
NUM_FOOTBALLERS = 1584
START_DATE = datetime(2024, 11, 1)
END_DATE = datetime(2024, 12, 31)

# Tarih aralığını oluştur
num_days = (END_DATE - START_DATE).days + 1
dates = [START_DATE + timedelta(days=i) for i in range(num_days)]

# Sentetik veri üretimi için yardımcı fonksiyonlar
def generate_physical_data(footballer_id, date, trend_data):
    # Belli oyuncular için ölçü düşme ve artma eğilimlerini uygula
    trend = trend_data.get(footballer_id, {"trend": "stable", "base": {}})
    base = trend["base"]
    progression = np.random.uniform(-0.2, 0.2)  # Rastgele değişim

    return {
        "footballer_id": footballer_id,
        "muscle_mass": round(base.get("muscle_mass", 75) + progression, 2),
        "muscle_strength": round(base.get("muscle_strength", 85) + progression, 2),
        "muscle_endurance": round(base.get("muscle_endurance", 65) + progression, 2),
        "flexibility": round(base.get("flexibility", 40) + progression, 2),
        "weight": round(base.get("weight", 85) + progression, 2),
        "body_fat_percentage": round(base.get("body_fat_percentage", 15) + progression, 2),
        "heights": round(base.get("heights", 185), 2),  # Sabit boy
        "thigh_circumference": round(base.get("thigh_circumference", 55) + progression, 2),
        "shoulder_circumference": round(base.get("shoulder_circumference", 110) + progression, 2),
        "arm_circumference": round(base.get("arm_circumference", 35) + progression, 2),
        "chest_circumference": round(base.get("chest_circumference", 100) + progression, 2),
        "back_circumference": round(base.get("back_circumference", 90) + progression, 2),
        "waist_circumference": round(base.get("waist_circumference", 80) + progression, 2),
        "leg_circumference": round(base.get("leg_circumference", 55) + progression, 2),
        "calf_circumference": round(base.get("calf_circumference", 35) + progression, 2),
        "created_at": date,
        "timestamp": date  # UNIX formatı
    }

def generate_conditional_data(footballer_id, date):
    return {
        "footballer_id": footballer_id,
        "VO2_max": round(np.random.uniform(40, 60), 2),
        "lactate_levels": round(np.random.uniform(1, 4), 2),
        "training_intensity": round(np.random.uniform(70, 90), 2),
        "recovery_times": round(np.random.uniform(12, 24), 2),
        "current_VO2_max": round(np.random.uniform(40, 60), 2),
        "current_lactate_levels": round(np.random.uniform(1, 4), 2),
        "current_muscle_strength": round(np.random.uniform(70, 100), 2),
        "target_VO2_max": round(np.random.uniform(50, 65), 2),
        "target_lactate_level": round(np.random.uniform(1, 3), 2),
        "target_muscle_strength": round(np.random.uniform(75, 105), 2),
        "created_at": date,
        "timestamp": int(date.timestamp()),  # UNIX formatı
    }

def generate_endurance_data(footballer_id, date):
    return {
        "footballer_id": footballer_id,
        "running_distance": round(np.random.uniform(5, 15), 2),
        "average_speed": round(np.random.uniform(5, 10), 2),
        "heart_rate": np.random.randint(120, 180),
        "peak_heart_rate": np.random.randint(180, 200),
        "training_intensity": round(np.random.uniform(70, 90), 2),
        "session": np.random.randint(1, 3),
        "created_at": date,
        "timestamp": int(date.timestamp()),  # UNIX formatı
    }

# Eğilim oluşturma (bazı futbolcular için değişim dinamiği)
trend_data = {
    i: {"trend": "decrease" if i % 2 == 0 else "increase", 
        "base": {
            "muscle_mass": np.random.uniform(60, 90),
            "muscle_strength": np.random.uniform(70, 100),
            "muscle_endurance": np.random.uniform(50, 80),
            "flexibility": np.random.uniform(30, 50),
            "weight": np.random.uniform(70, 100),
            "body_fat_percentage": np.random.uniform(10, 20),
            "thigh_circumference": np.random.uniform(50, 60),
            "shoulder_circumference": np.random.uniform(100, 120),
            "arm_circumference": np.random.uniform(30, 40),
            "chest_circumference": np.random.uniform(90, 110),
            "back_circumference": np.random.uniform(80, 100),
            "waist_circumference": np.random.uniform(70, 90),
            "leg_circumference": np.random.uniform(50, 60),
            "calf_circumference": np.random.uniform(30, 40),
        }} for i in range(1, NUM_FOOTBALLERS + 1)
}

# Verileri oluştur
physical_data = []
conditional_data = []
endurance_data = []

for footballer_id in range(1, NUM_FOOTBALLERS + 1):
    for date in dates:
        physical_data.append(generate_physical_data(footballer_id, date, trend_data))
        conditional_data.append(generate_conditional_data(footballer_id, date))
        endurance_data.append(generate_endurance_data(footballer_id, date))

# Verileri DataFrame'lere dönüştür
physical_df = pd.DataFrame(physical_data)
conditional_df = pd.DataFrame(conditional_data)
endurance_df = pd.DataFrame(endurance_data)

# Verileri CSV dosyalarına kaydet
physical_df.to_csv('../processed_data/physical.csv', index=False)
conditional_df.to_csv('../processed_data/conditional.csv', index=False)
endurance_df.to_csv('../processed_data/endurance.csv', index=False)

print("Veriler başarıyla oluşturuldu ve kaydedildi.")


Veriler başarıyla oluşturuldu ve kaydedildi.


In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Sabitler
NUM_FOOTBALLERS = 1584
START_DATE = datetime(2024, 11, 1)
END_DATE = datetime(2024, 12, 31)

# Tarih aralığını oluştur
num_days = (END_DATE - START_DATE).days + 1
dates = [START_DATE + timedelta(days=i) for i in range(num_days)]

# Rastgele trendler oluştur
def assign_trends(num_footballers):
    trends = {}
    for footballer_id in range(1, num_footballers + 1):
        trend_type = np.random.choice(["increase", "decrease", "stable", "fluctuate"])
        base_values = {
            "muscle_mass": np.random.uniform(60, 90),
            "muscle_strength": np.random.uniform(70, 100),
            "muscle_endurance": np.random.uniform(50, 80),
            "flexibility": np.random.uniform(30, 50),
            "weight": np.random.uniform(70, 100),
            "body_fat_percentage": np.random.uniform(10, 20),
            "thigh_circumference": np.random.uniform(50, 60),
            "shoulder_circumference": np.random.uniform(100, 120),
            "arm_circumference": np.random.uniform(30, 40),
            "chest_circumference": np.random.uniform(90, 110),
            "back_circumference": np.random.uniform(80, 100),
            "waist_circumference": np.random.uniform(70, 90),
            "leg_circumference": np.random.uniform(50, 60),
            "calf_circumference": np.random.uniform(30, 40),
        }
        trends[footballer_id] = {"trend": trend_type, "base": base_values}
    return trends

# Trendlere dayalı fiziksel veri oluştur
def generate_physical_data(footballer_id, date, trend_data):
    trend = trend_data.get(footballer_id, {"trend": "stable", "base": {}})
    base = trend["base"]
    progression = np.random.uniform(-0.5, 0.5)
    if trend["trend"] == "increase":
        progression = abs(progression)
    elif trend["trend"] == "decrease":
        progression = -abs(progression)
    elif trend["trend"] == "fluctuate":
        progression *= np.random.choice([-1, 1])
    
    return {
        "footballer_id": footballer_id,
        "muscle_mass": round(base.get("muscle_mass", 75) + progression, 2),
        "muscle_strength": round(base.get("muscle_strength", 85) + progression, 2),
        "muscle_endurance": round(base.get("muscle_endurance", 65) + progression, 2),
        "flexibility": round(base.get("flexibility", 40) + progression, 2),
        "weight": round(base.get("weight", 85) + progression, 2),
        "body_fat_percentage": round(base.get("body_fat_percentage", 15) + progression, 2),
        "thigh_circumference": round(base.get("thigh_circumference", 55) + progression, 2),
        "shoulder_circumference": round(base.get("shoulder_circumference", 110) + progression, 2),
        "arm_circumference": round(base.get("arm_circumference", 35) + progression, 2),
        "chest_circumference": round(base.get("chest_circumference", 100) + progression, 2),
        "back_circumference": round(base.get("back_circumference", 90) + progression, 2),
        "waist_circumference": round(base.get("waist_circumference", 80) + progression, 2),
        "leg_circumference": round(base.get("leg_circumference", 55) + progression, 2),
        "calf_circumference": round(base.get("calf_circumference", 35) + progression, 2),
        "created_at": date,
        "timestamp": date.strftime("%H:%M:%S.%f")[:-3],
    }

# Kondisyonel veri oluştur
def generate_conditional_data(footballer_id, date):
    return {
        "footballer_id": footballer_id,
        "VO2_max": round(np.random.uniform(40, 60), 2),
        "lactate_levels": round(np.random.uniform(1, 4), 2),
        "training_intensity": round(np.random.uniform(70, 90), 2),
        "recovery_times": round(np.random.uniform(12, 24), 2),
        "current_VO2_max": round(np.random.uniform(40, 60), 2),
        "current_lactate_levels": round(np.random.uniform(1, 4), 2),
        "current_muscle_strength": round(np.random.uniform(70, 100), 2),
        "target_VO2_max": round(np.random.uniform(50, 65), 2),
        "target_lactate_level": round(np.random.uniform(1, 3), 2),
        "target_muscle_strength": round(np.random.uniform(75, 105), 2),
        "created_at": date,
        "timestamp": date.strftime("%H:%M:%S.%f")[:-3],  # Tarih formatı
    }

def generate_endurance_data(footballer_id, date):
    return {
        "footballer_id": footballer_id,
        "running_distance": round(np.random.uniform(5, 15), 2),
        "average_speed": round(np.random.uniform(5, 10), 2),
        "heart_rate": np.random.randint(120, 180),
        "peak_heart_rate": np.random.randint(180, 200),
        "training_intensity": round(np.random.uniform(70, 90), 2),
        "session": np.random.randint(1, 3),
        "created_at": date,
        "timestamp": date.strftime("%H:%M:%S.%f")[:-3],  # Tarih formatı
    }

# Verileri oluştur
trend_data = assign_trends(NUM_FOOTBALLERS)
physical_data = []
conditional_data = []
endurance_data = []

for footballer_id in range(1, NUM_FOOTBALLERS + 1):
    for date in dates:
        physical_data.append(generate_physical_data(footballer_id, date, trend_data))
        conditional_data.append(generate_conditional_data(footballer_id, date))
        endurance_data.append(generate_endurance_data(footballer_id, date))

# Verileri DataFrame'lere dönüştür
physical_df = pd.DataFrame(physical_data)
conditional_df = pd.DataFrame(conditional_data)
endurance_df = pd.DataFrame(endurance_data)

# Verileri CSV dosyalarına kaydet
physical_df.to_csv('../processed_data/physical.csv', index=False)
conditional_df.to_csv('../processed_data/conditional.csv', index=False)
endurance_df.to_csv('../processed_data/endurance.csv', index=False)

print("Veriler başarıyla oluşturuldu ve kaydedildi.")


Veriler başarıyla oluşturuldu ve kaydedildi.


In [4]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Sabitler
NUM_FOOTBALLERS = 1584
START_DATE = datetime(2024, 11, 1)
END_DATE = datetime(2024, 12, 31)

# Tarih aralığını oluştur
num_days = (END_DATE - START_DATE).days + 1
dates = [START_DATE + timedelta(days=i) for i in range(num_days)]

# Rastgele trendler oluştur
def assign_trends(num_footballers):
    trends = {}
    for footballer_id in range(1, num_footballers + 1):
        trend_type = np.random.choice(["increase", "decrease", "stable", "fluctuate"])
        base_values = {
            "muscle_mass": np.random.uniform(60, 90),
            "muscle_strength": np.random.uniform(70, 100),
            "muscle_endurance": np.random.uniform(50, 80),
            "flexibility": np.random.uniform(30, 50),
            "weight": np.random.uniform(70, 100),
            "body_fat_percentage": np.random.uniform(10, 20),
            "thigh_circumference": np.random.uniform(50, 60),
            "shoulder_circumference": np.random.uniform(100, 120),
            "arm_circumference": np.random.uniform(30, 40),
            "chest_circumference": np.random.uniform(90, 110),
            "back_circumference": np.random.uniform(80, 100),
            "waist_circumference": np.random.uniform(70, 90),
            "leg_circumference": np.random.uniform(50, 60),
            "calf_circumference": np.random.uniform(30, 40),
        }
        trends[footballer_id] = {"trend": trend_type, "base": base_values}
    return trends

# Rastgele bir saat oluştur
def random_time_on_date(date):
    random_hour = np.random.randint(6, 22)  # Sabah 6 ile akşam 10 arasında
    random_minute = np.random.randint(0, 60)
    random_second = np.random.randint(0, 60)
    random_microsecond = np.random.randint(0, 1_000_000)
    return date + timedelta(hours=random_hour, minutes=random_minute, seconds=random_second, microseconds=random_microsecond)

# Trendlere dayalı fiziksel veri oluştur
def generate_physical_data(footballer_id, date, trend_data):
    trend = trend_data.get(footballer_id, {"trend": "stable", "base": {}})
    base = trend["base"]
    progression = np.random.uniform(-0.5, 0.5)
    if trend["trend"] == "increase":
        progression = abs(progression)
    elif trend["trend"] == "decrease":
        progression = -abs(progression)
    elif trend["trend"] == "fluctuate":
        progression *= np.random.choice([-1, 1])
    
    random_date_time = random_time_on_date(date)
    return {
        "footballer_id": footballer_id,
        "muscle_mass": round(base.get("muscle_mass", 75) + progression, 2),
        "muscle_strength": round(base.get("muscle_strength", 85) + progression, 2),
        "muscle_endurance": round(base.get("muscle_endurance", 65) + progression, 2),
        "flexibility": round(base.get("flexibility", 40) + progression, 2),
        "weight": round(base.get("weight", 85) + progression, 2),
        "body_fat_percentage": round(base.get("body_fat_percentage", 15) + progression, 2),
        "thigh_circumference": round(base.get("thigh_circumference", 55) + progression, 2),
        "shoulder_circumference": round(base.get("shoulder_circumference", 110) + progression, 2),
        "arm_circumference": round(base.get("arm_circumference", 35) + progression, 2),
        "chest_circumference": round(base.get("chest_circumference", 100) + progression, 2),
        "back_circumference": round(base.get("back_circumference", 90) + progression, 2),
        "waist_circumference": round(base.get("waist_circumference", 80) + progression, 2),
        "leg_circumference": round(base.get("leg_circumference", 55) + progression, 2),
        "calf_circumference": round(base.get("calf_circumference", 35) + progression, 2),
        "created_at": random_date_time,
        "timestamp": random_date_time.strftime("%H:%M:%S.%f")[:-3],
    }

# Kondisyonel veri oluştur
def generate_conditional_data(footballer_id, date):
    random_date_time = random_time_on_date(date)
    return {
        "footballer_id": footballer_id,
        "VO2_max": round(np.random.uniform(40, 60), 2),
        "lactate_levels": round(np.random.uniform(1, 4), 2),
        "training_intensity": round(np.random.uniform(70, 90), 2),
        "recovery_times": round(np.random.uniform(12, 24), 2),
        "current_VO2_max": round(np.random.uniform(40, 60), 2),
        "current_lactate_levels": round(np.random.uniform(1, 4), 2),
        "current_muscle_strength": round(np.random.uniform(70, 100), 2),
        "target_VO2_max": round(np.random.uniform(50, 65), 2),
        "target_lactate_level": round(np.random.uniform(1, 3), 2),
        "target_muscle_strength": round(np.random.uniform(75, 105), 2),
        "created_at": random_date_time,
        "timestamp": random_date_time.strftime("%H:%M:%S.%f")[:-3],
    }

def generate_endurance_data(footballer_id, date):
    random_date_time = random_time_on_date(date)
    return {
        "footballer_id": footballer_id,
        "running_distance": round(np.random.uniform(5, 15), 2),
        "average_speed": round(np.random.uniform(5, 10), 2),
        "heart_rate": np.random.randint(120, 180),
        "peak_heart_rate": np.random.randint(180, 200),
        "training_intensity": round(np.random.uniform(70, 90), 2),
        "session": np.random.randint(1, 3),
        "created_at": random_date_time,
        "timestamp": random_date_time.strftime("%H:%M:%S.%f")[:-3],
    }

# Verileri oluştur
trend_data = assign_trends(NUM_FOOTBALLERS)
physical_data = []
conditional_data = []
endurance_data = []

for footballer_id in range(1, NUM_FOOTBALLERS + 1):
    for date in dates:
        physical_data.append(generate_physical_data(footballer_id, date, trend_data))
        conditional_data.append(generate_conditional_data(footballer_id, date))
        endurance_data.append(generate_endurance_data(footballer_id, date))

# Verileri DataFrame'lere dönüştür
physical_df = pd.DataFrame(physical_data)
conditional_df = pd.DataFrame(conditional_data)
endurance_df = pd.DataFrame(endurance_data)

# Verileri CSV dosyalarına kaydet
physical_df.to_csv('../processed_data/physical.csv', index=False)
conditional_df.to_csv('../processed_data/conditional.csv', index=False)
endurance_df.to_csv('../processed_data/endurance.csv', index=False)

print("Veriler başarıyla oluşturuldu ve kaydedildi.")


Veriler başarıyla oluşturuldu ve kaydedildi.


In [6]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Sabitler
NUM_FOOTBALLERS = 1584
START_DATE = datetime(2024, 11, 1)
END_DATE = datetime(2024, 12, 31)

# Tarih aralığını oluştur
num_days = (END_DATE - START_DATE).days + 1
dates = [START_DATE + timedelta(days=i) for i in range(num_days)]

# Rastgele trendler oluştur
def assign_trends(num_footballers):
    trends = {}
    for footballer_id in range(1, num_footballers + 1):
        trend_type = np.random.choice(["increase", "decrease", "stable", "fluctuate"])
        base_values = {
            "muscle_mass": np.random.uniform(60, 90),
            "muscle_strength": np.random.uniform(70, 100),
            "muscle_endurance": np.random.uniform(50, 80),
            "flexibility": np.random.uniform(30, 50),
            "weight": np.random.uniform(70, 100),
            "body_fat_percentage": np.random.uniform(10, 20),
            "thigh_circumference": np.random.uniform(50, 60),
            "shoulder_circumference": np.random.uniform(100, 120),
            "arm_circumference": np.random.uniform(30, 40),
            "chest_circumference": np.random.uniform(90, 110),
            "back_circumference": np.random.uniform(80, 100),
            "waist_circumference": np.random.uniform(70, 90),
            "leg_circumference": np.random.uniform(50, 60),
            "calf_circumference": np.random.uniform(30, 40),
        }
        trends[footballer_id] = {"trend": trend_type, "base": base_values}
    return trends

# Rastgele bir saat oluştur
def random_time_on_date(date):
    random_hour = np.random.randint(6, 22)  # Sabah 6 ile akşam 10 arasında
    random_minute = np.random.randint(0, 60)
    random_second = np.random.randint(0, 60)
    random_microsecond = np.random.randint(0, 1_000_000)
    return date + timedelta(hours=random_hour, minutes=random_minute, seconds=random_second, microseconds=random_microsecond)

# Trendlere dayalı fiziksel veri oluştur
def generate_physical_data(footballer_id, date, trend_data):
    trend = trend_data.get(footballer_id, {"trend": "stable", "base": {}})
    base = trend["base"]
    progression = np.random.uniform(-0.5, 0.5)
    if trend["trend"] == "increase":
        progression = abs(progression)
    elif trend["trend"] == "decrease":
        progression = -abs(progression)
    elif trend["trend"] == "fluctuate":
        progression *= np.random.choice([-1, 1])
    
    random_date_time = random_time_on_date(date)
    return {
        "footballer_id": footballer_id,
        "muscle_mass": round(base.get("muscle_mass", 75) + progression, 2),
        "muscle_strength": round(base.get("muscle_strength", 85) + progression, 2),
        "muscle_endurance": round(base.get("muscle_endurance", 65) + progression, 2),
        "flexibility": round(base.get("flexibility", 40) + progression, 2),
        "weight": round(base.get("weight", 85) + progression, 2),
        "body_fat_percentage": round(base.get("body_fat_percentage", 15) + progression, 2),
        "thigh_circumference": round(base.get("thigh_circumference", 55) + progression, 2),
        "shoulder_circumference": round(base.get("shoulder_circumference", 110) + progression, 2),
        "arm_circumference": round(base.get("arm_circumference", 35) + progression, 2),
        "chest_circumference": round(base.get("chest_circumference", 100) + progression, 2),
        "back_circumference": round(base.get("back_circumference", 90) + progression, 2),
        "waist_circumference": round(base.get("waist_circumference", 80) + progression, 2),
        "leg_circumference": round(base.get("leg_circumference", 55) + progression, 2),
        "calf_circumference": round(base.get("calf_circumference", 35) + progression, 2),
        "created_at": random_date_time.strftime("%Y-%m-%d"),
        "timestamp": random_date_time,
    }

# Kondisyonel veri oluştur
def generate_conditional_data(footballer_id, date):
    random_date_time = random_time_on_date(date)
    return {
        "footballer_id": footballer_id,
        "VO2_max": round(np.random.uniform(40, 60), 2),
        "lactate_levels": round(np.random.uniform(1, 4), 2),
        "training_intensity": round(np.random.uniform(70, 90), 2),
        "recovery_times": round(np.random.uniform(12, 24), 2),
        "current_VO2_max": round(np.random.uniform(40, 60), 2),
        "current_lactate_levels": round(np.random.uniform(1, 4), 2),
        "current_muscle_strength": round(np.random.uniform(70, 100), 2),
        "target_VO2_max": round(np.random.uniform(50, 65), 2),
        "target_lactate_level": round(np.random.uniform(1, 3), 2),
        "target_muscle_strength": round(np.random.uniform(75, 105), 2),
        "created_at": random_date_time.strftime("%Y-%m-%d"),
        "timestamp": random_date_time,
    }

def generate_endurance_data(footballer_id, date):
    random_date_time = random_time_on_date(date)
    return {
        "footballer_id": footballer_id,
        "running_distance": round(np.random.uniform(5, 15), 2),
        "average_speed": round(np.random.uniform(5, 10), 2),
        "heart_rate": np.random.randint(120, 180),
        "peak_heart_rate": np.random.randint(180, 200),
        "training_intensity": round(np.random.uniform(70, 90), 2),
        "session": np.random.randint(1, 3),
        "created_at": random_date_time.strftime("%Y-%m-%d"),
        "timestamp": random_date_time,
    }

# Verileri oluştur
trend_data = assign_trends(NUM_FOOTBALLERS)
physical_data = []
conditional_data = []
endurance_data = []

for footballer_id in range(1, NUM_FOOTBALLERS + 1):
    for date in dates:
        physical_data.append(generate_physical_data(footballer_id, date, trend_data))
        conditional_data.append(generate_conditional_data(footballer_id, date))
        endurance_data.append(generate_endurance_data(footballer_id, date))

# Verileri DataFrame'lere dönüştür
physical_df = pd.DataFrame(physical_data)
conditional_df = pd.DataFrame(conditional_data)
endurance_df = pd.DataFrame(endurance_data)

# Verileri CSV dosyalarına kaydet
physical_df.to_csv('../processed_data/physical.csv', index=False)
conditional_df.to_csv('../processed_data/conditional.csv', index=False)
endurance_df.to_csv('../processed_data/endurance.csv', index=False)

print("Veriler başarıyla oluşturuldu ve kaydedildi.")


Veriler başarıyla oluşturuldu ve kaydedildi.


In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Sabitler
NUM_FOOTBALLERS = 1584
START_DATE = datetime(2024, 11, 1)
END_DATE = datetime(2024, 12, 31)

# Tarih aralığını oluştur
num_days = (END_DATE - START_DATE).days + 1
dates = [START_DATE + timedelta(days=i) for i in range(num_days)]

# Rastgele trendler oluştur
def assign_trends(num_footballers):
    trends = {}
    for footballer_id in range(1, num_footballers + 1):
        trend_type = np.random.choice(["increase", "decrease", "stable", "fluctuate"])
        base_values = {
            "muscle_mass": np.random.uniform(60, 90),
            "muscle_strength": np.random.uniform(70, 100),
            "muscle_endurance": np.random.uniform(50, 80),
            "flexibility": np.random.uniform(30, 50),
            "weight": np.random.uniform(70, 100),
            "body_fat_percentage": np.random.uniform(10, 20),
            "thigh_circumference": np.random.uniform(50, 60),
            "shoulder_circumference": np.random.uniform(100, 120),
            "arm_circumference": np.random.uniform(30, 40),
            "chest_circumference": np.random.uniform(90, 110),
            "back_circumference": np.random.uniform(80, 100),
            "waist_circumference": np.random.uniform(70, 90),
            "leg_circumference": np.random.uniform(50, 60),
            "calf_circumference": np.random.uniform(30, 40),
        }
        trends[footballer_id] = {"trend": trend_type, "base": base_values}
    return trends

# Rastgele bir saat oluştur
def random_time_on_date(date):
    random_hour = np.random.randint(6, 22)  # Sabah 6 ile akşam 10 arasında
    random_minute = np.random.randint(0, 60)
    random_second = np.random.randint(0, 60)
    random_microsecond = np.random.randint(0, 1_000_000)
    return date + timedelta(hours=random_hour, minutes=random_minute, seconds=random_second, microseconds=random_microsecond)

# Trendlere dayalı fiziksel veri oluştur
def generate_physical_data(footballer_id, date, trend_data):
    trend = trend_data.get(footballer_id, {"trend": "stable", "base": {}})
    base = trend["base"]
    progression = np.random.uniform(-0.5, 0.5)
    if trend["trend"] == "increase":
        progression = abs(progression)
    elif trend["trend"] == "decrease":
        progression = -abs(progression)
    elif trend["trend"] == "fluctuate":
        progression *= np.random.choice([-1, 1])
    
    random_date_time = random_time_on_date(date)
    return {
        "id": None,  # Primary key için boş bırakılıyor
        "footballer_id": footballer_id,
        "muscle_mass": round(base.get("muscle_mass", 75) + progression, 2),
        "muscle_strength": round(base.get("muscle_strength", 85) + progression, 2),
        "muscle_endurance": round(base.get("muscle_endurance", 65) + progression, 2),
        "flexibility": round(base.get("flexibility", 40) + progression, 2),
        "weight": round(base.get("weight", 85) + progression, 2),
        "body_fat_percentage": round(base.get("body_fat_percentage", 15) + progression, 2),
        "thigh_circumference": round(base.get("thigh_circumference", 55) + progression, 2),
        "shoulder_circumference": round(base.get("shoulder_circumference", 110) + progression, 2),
        "arm_circumference": round(base.get("arm_circumference", 35) + progression, 2),
        "chest_circumference": round(base.get("chest_circumference", 100) + progression, 2),
        "back_circumference": round(base.get("back_circumference", 90) + progression, 2),
        "waist_circumference": round(base.get("waist_circumference", 80) + progression, 2),
        "leg_circumference": round(base.get("leg_circumference", 55) + progression, 2),
        "calf_circumference": round(base.get("calf_circumference", 35) + progression, 2),
        "created_at": random_date_time.strftime("%Y-%m-%d"),
        "timestamp": random_date_time,
    }

# Kondisyonel veri oluştur
def generate_conditional_data(footballer_id, date):
    random_date_time = random_time_on_date(date)
    return {
        "id": None,  # Primary key için boş bırakılıyor
        "footballer_id": footballer_id,
        "VO2_max": round(np.random.uniform(40, 60), 2),
        "lactate_levels": round(np.random.uniform(1, 4), 2),
        "training_intensity": round(np.random.uniform(70, 90), 2),
        "recovery_times": round(np.random.uniform(12, 24), 2),
        "current_VO2_max": round(np.random.uniform(40, 60), 2),
        "current_lactate_levels": round(np.random.uniform(1, 4), 2),
        "current_muscle_strength": round(np.random.uniform(70, 100), 2),
        "target_VO2_max": round(np.random.uniform(50, 65), 2),
        "target_lactate_level": round(np.random.uniform(1, 3), 2),
        "target_muscle_strength": round(np.random.uniform(75, 105), 2),
        "created_at": random_date_time.strftime("%Y-%m-%d"),
        "timestamp": random_date_time,
    }

def generate_endurance_data(footballer_id, date):
    random_date_time = random_time_on_date(date)
    return {
        "id": None,  # Primary key için boş bırakılıyor
        "footballer_id": footballer_id,
        "running_distance": round(np.random.uniform(5, 15), 2),
        "average_speed": round(np.random.uniform(5, 10), 2),
        "heart_rate": np.random.randint(120, 180),
        "peak_heart_rate": np.random.randint(180, 200),
        "training_intensity": round(np.random.uniform(70, 90), 2),
        "session": np.random.randint(1, 3),
        "created_at": random_date_time.strftime("%Y-%m-%d"),
        "timestamp": random_date_time,
    }

# Verileri oluştur
trend_data = assign_trends(NUM_FOOTBALLERS)
physical_data = []
conditional_data = []
endurance_data = []

for footballer_id in range(1, NUM_FOOTBALLERS + 1):
    for date in dates:
        physical_data.append(generate_physical_data(footballer_id, date, trend_data))
        conditional_data.append(generate_conditional_data(footballer_id, date))
        endurance_data.append(generate_endurance_data(footballer_id, date))

# Verileri DataFrame'lere dönüştür
physical_df = pd.DataFrame(physical_data)
conditional_df = pd.DataFrame(conditional_data)
endurance_df = pd.DataFrame(endurance_data)

# Verileri CSV dosyalarına kaydet
physical_df.to_csv('../processed_data/physical.csv', index=False)
conditional_df.to_csv('../processed_data/conditional.csv', index=False)
endurance_df.to_csv('../processed_data/endurance.csv', index=False)

print("Veriler başarıyla oluşturuldu ve kaydedildi.")



Veriler başarıyla oluşturuldu ve kaydedildi.


In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Sabitler
NUM_FOOTBALLERS = 1584
START_DATE = datetime(2024, 11, 1)
END_DATE = datetime(2024, 12, 31)

# Tarih aralığını oluştur
num_days = (END_DATE - START_DATE).days + 1
dates = [START_DATE + timedelta(days=i) for i in range(num_days)]

# Rastgele trendler oluştur
def assign_trends(num_footballers):
    trends = {}
    for footballer_id in range(1, num_footballers + 1):
        trend_type = np.random.choice(["increase", "decrease", "stable", "fluctuate"])
        base_values = {
            "muscle_mass": np.random.uniform(60, 90),
            "muscle_strength": np.random.uniform(70, 100),
            "muscle_endurance": np.random.uniform(50, 80),
            "flexibility": np.random.uniform(30, 50),
            "weight": np.random.uniform(70, 100),
            "body_fat_percentage": np.random.uniform(10, 20),
            "thigh_circumference": np.random.uniform(50, 60),
            "shoulder_circumference": np.random.uniform(100, 120),
            "arm_circumference": np.random.uniform(30, 40),
            "chest_circumference": np.random.uniform(90, 110),
            "back_circumference": np.random.uniform(80, 100),
            "waist_circumference": np.random.uniform(70, 90),
            "leg_circumference": np.random.uniform(50, 60),
            "calf_circumference": np.random.uniform(30, 40),
        }
        trends[footballer_id] = {"trend": trend_type, "base": base_values}
    return trends

# Rastgele bir saat oluştur
def random_time_on_date(date):
    random_hour = np.random.randint(6, 22)  # Sabah 6 ile akşam 10 arasında
    random_minute = np.random.randint(0, 60)
    random_second = np.random.randint(0, 60)
    random_microsecond = np.random.randint(0, 1_000_000)
    return date + timedelta(hours=random_hour, minutes=random_minute, seconds=random_second, microseconds=random_microsecond)

# Trendlere dayalı fiziksel veri oluştur
def generate_physical_data(footballer_id, date, trend_data, id_counter):
    trend = trend_data.get(footballer_id, {"trend": "stable", "base": {}})
    base = trend["base"]
    progression = np.random.uniform(-0.5, 0.5)
    if trend["trend"] == "increase":
        progression = abs(progression)
    elif trend["trend"] == "decrease":
        progression = -abs(progression)
    elif trend["trend"] == "fluctuate":
        progression *= np.random.choice([-1, 1])
    
    random_date_time = random_time_on_date(date)
    physical_data = {
        "id": id_counter,  # Primary key olarak id_counter kullanılıyor
        "footballer_id": footballer_id,
        "muscle_mass": round(base.get("muscle_mass", 75) + progression, 2),
        "muscle_strength": round(base.get("muscle_strength", 85) + progression, 2),
        "muscle_endurance": round(base.get("muscle_endurance", 65) + progression, 2),
        "flexibility": round(base.get("flexibility", 40) + progression, 2),
        "weight": round(base.get("weight", 85) + progression, 2),
        "body_fat_percentage": round(base.get("body_fat_percentage", 15) + progression, 2),
        "thigh_circumference": round(base.get("thigh_circumference", 55) + progression, 2),
        "shoulder_circumference": round(base.get("shoulder_circumference", 110) + progression, 2),
        "arm_circumference": round(base.get("arm_circumference", 35) + progression, 2),
        "chest_circumference": round(base.get("chest_circumference", 100) + progression, 2),
        "back_circumference": round(base.get("back_circumference", 90) + progression, 2),
        "waist_circumference": round(base.get("waist_circumference", 80) + progression, 2),
        "leg_circumference": round(base.get("leg_circumference", 55) + progression, 2),
        "calf_circumference": round(base.get("calf_circumference", 35) + progression, 2),
        "created_at": random_date_time.strftime("%Y-%m-%d"),
        "timestamp": random_date_time,
    }
    
    return physical_data, id_counter + 1  # Yeni id_counter döndürülüyor

# Kondisyonel veri oluştur
def generate_conditional_data(footballer_id, date, id_counter):
    random_date_time = random_time_on_date(date)
    conditional_data = {
        "id": id_counter,  # Primary key olarak id_counter kullanılıyor
        "footballer_id": footballer_id,
        "VO2_max": round(np.random.uniform(40, 60), 2),
        "lactate_levels": round(np.random.uniform(1, 4), 2),
        "training_intensity": round(np.random.uniform(70, 90), 2),
        "recovery_times": round(np.random.uniform(12, 24), 2),
        "current_VO2_max": round(np.random.uniform(40, 60), 2),
        "current_lactate_levels": round(np.random.uniform(1, 4), 2),
        "current_muscle_strength": round(np.random.uniform(70, 100), 2),
        "target_VO2_max": round(np.random.uniform(50, 65), 2),
        "target_lactate_level": round(np.random.uniform(1, 3), 2),
        "target_muscle_strength": round(np.random.uniform(75, 105), 2),
        "created_at": random_date_time.strftime("%Y-%m-%d"),
        "timestamp": random_date_time,
    }
    
    return conditional_data, id_counter + 1  # Yeni id_counter döndürülüyor

def generate_endurance_data(footballer_id, date, id_counter):
    random_date_time = random_time_on_date(date)
    endurance_data = {
        "id": id_counter,  # Primary key olarak id_counter kullanılıyor
        "footballer_id": footballer_id,
        "running_distance": round(np.random.uniform(5, 15), 2),
        "average_speed": round(np.random.uniform(5, 10), 2),
        "heart_rate": np.random.randint(120, 180),
        "peak_heart_rate": np.random.randint(180, 200),
        "training_intensity": round(np.random.uniform(70, 90), 2),
        "session": np.random.randint(1, 3),
        "created_at": random_date_time.strftime("%Y-%m-%d"),
        "timestamp": random_date_time,
    }
    
    return endurance_data, id_counter + 1  # Yeni id_counter döndürülüyor

# Verileri oluştur
trend_data = assign_trends(NUM_FOOTBALLERS)
physical_data = []
conditional_data = []
endurance_data = []

id_counter = 1  # İlk ID'yi 1'den başlatıyoruz

for footballer_id in range(1, NUM_FOOTBALLERS + 1):
    for date in dates:
        physical_entry, id_counter = generate_physical_data(footballer_id, date, trend_data, id_counter)
        conditional_entry, id_counter = generate_conditional_data(footballer_id, date, id_counter)
        endurance_entry, id_counter = generate_endurance_data(footballer_id, date, id_counter)
        
        physical_data.append(physical_entry)
        conditional_data.append(conditional_entry)
        endurance_data.append(endurance_entry)

# Verileri DataFrame'lere dönüştür
physical_df = pd.DataFrame(physical_data)
conditional_df = pd.DataFrame(conditional_data)
endurance_df = pd.DataFrame(endurance_data)

# Verileri CSV dosyalarına kaydet
physical_df.to_csv('../processed_data/physical.csv', index=False)
conditional_df.to_csv('../processed_data/conditional.csv', index=False)
endurance_df.to_csv('../processed_data/endurance.csv', index=False)

print("Veriler başarıyla oluşturuldu ve kaydedildi!")


Veriler başarıyla oluşturuldu ve kaydedildi!


In [2]:
import pandas as pd

# Veri dosyalarının yolları
processed_data_path = '../processed_data/physical.csv'
raw_data_path = '../raw_data/footballers.csv'

# Verileri yükleme
processed_data = pd.read_csv(processed_data_path)
raw_data = pd.read_csv(raw_data_path)

# Heights sütununu ekleme
processed_data = processed_data.merge(
    raw_data[['footballer_id', 'height']],
    on='footballer_id',
    how='left'
)

# Sütunu yeniden adlandırma
processed_data.rename(columns={'height': 'heights'}, inplace=True)

# Kaydedilen sonucu dosyaya yazma
processed_data.to_csv(processed_data_path, index=False)

print("Heights sütunu başarıyla eklendi ve dosya kaydedildi!")


Heights sütunu başarıyla eklendi ve dosya kaydedildi!


In [3]:
import pandas as pd

# Veri dosyasının yolu
processed_data_path = '../processed_data/physical.csv'

# Veriyi yükleme
processed_data = pd.read_csv(processed_data_path)

# Sütunların sırasını değiştirme
columns_order = [
    'id', 'footballer_id', 'muscle_mass', 'muscle_strength', 'muscle_endurance',
    'flexibility', 'weight', 'body_fat_percentage', 'heights', 'thigh_circumference',
    'shoulder_circumference', 'arm_circumference', 'chest_circumference',
    'back_circumference', 'waist_circumference', 'leg_circumference', 
    'calf_circumference', 'created_at', 'timestamp'
]

# Yeni sıralamaya göre dataframe'i düzenleme
processed_data = processed_data[columns_order]

# Sonucu kaydetme
processed_data.to_csv(processed_data_path, index=False)

print("Heights sütunu başarıyla taşındı ve dosya kaydedildi!")


Heights sütunu başarıyla taşındı ve dosya kaydedildi!
