In [6]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# 1. Verilerin Yüklenmesi
footballers = pd.read_csv('../raw_data/footballers.csv')
football_teams = pd.read_csv('../processed_data/football_teams_cleaned.csv')
leagues = pd.read_csv('../raw_data/leagues.csv')

# 2. Verileri Birleştirme (Ortak Sütunlar için)
teams_with_leagues = pd.merge(football_teams, leagues, on=['league_id', 'league_name'])
merged_data = pd.merge(footballers, teams_with_leagues, on=['team_id', 'league_id', 'league_name'])

# 3. Ortak Sütunları Seçme
common_columns = [
    'footballer_id', 'footballer_name', 'league_id', 'league_name', 'league_logo_path', 
    'team_id', 'club', 'img_path', 'birthday', 'nationality_img_path', 
    'footballer_img_path'
]

# 4. 'created_at' ve 'timestamp' Sütunları için Rastgele Veriler Üretme
num_records = len(merged_data)

# "created_at" ve "timestamp" rastgele tarih verisi üretmek için
created_at = [datetime.now() - timedelta(days=np.random.randint(1, 1000)) for _ in range(num_records)]
timestamp = [datetime.now().strftime('%Y-%m-%d %H:%M:%S') for _ in range(num_records)]

# 5. Verileri Birleştirme
common_data = merged_data[common_columns].copy()
common_data['created_at'] = created_at
common_data['timestamp'] = timestamp

# 6. Sentetik Verilerin Eklenmesi

# Physical Verileri
np.random.seed(42)  # Tekrar edilebilirlik için
physical_data = common_data.copy()
physical_data['muscle_mass'] = np.random.uniform(50, 100, num_records)  # kg
physical_data['muscle_strength'] = np.random.uniform(200, 500, num_records)  # kg
physical_data['muscle_endurance'] = np.random.uniform(1, 10, num_records)  # level
physical_data['flexibility'] = np.random.uniform(1, 10, num_records)  # level
physical_data['weight'] = np.random.uniform(60, 100, num_records)  # kg
physical_data['body_fat_percentage'] = np.random.uniform(10, 25, num_records)  # %
physical_data['heights'] = np.random.uniform(1.6, 2.1, num_records)  # meters
physical_data['thigh_circumference'] = np.random.uniform(50, 70, num_records)  # cm
physical_data['shoulder_circumference'] = np.random.uniform(100, 140, num_records)  # cm
physical_data['arm_circumference'] = np.random.uniform(30, 50, num_records)  # cm
physical_data['chest_circumference'] = np.random.uniform(80, 120, num_records)  # cm
physical_data['back_circumference'] = np.random.uniform(80, 110, num_records)  # cm
physical_data['waist_circumference'] = np.random.uniform(60, 100, num_records)  # cm
physical_data['leg_circumference'] = np.random.uniform(40, 60, num_records)  # cm
physical_data['calf_circumference'] = np.random.uniform(30, 50, num_records)  # cm

# Conditional Verileri
conditional_data = common_data.copy()

# Conditional verileri oluştururken, physical_data'dan gerekli sütunları ekleyelim
conditional_data['muscle_strength'] = physical_data['muscle_strength']  # physical verisinden alınan sütun

conditional_data['VO2_max'] = np.random.uniform(40, 70, num_records)  # ml/kg/min
conditional_data['lactate_levels'] = np.random.uniform(1, 5, num_records)  # mmol/L
conditional_data['training_intensity'] = np.random.uniform(1, 10, num_records)  # level
conditional_data['recovery_times'] = np.random.randint(24, 72, num_records)  # hours
conditional_data['current_VO2_max'] = np.random.uniform(30, 60, num_records)  # ml/kg/min
conditional_data['current_lactate_levels'] = np.random.uniform(1, 5, num_records)  # mmol/L
conditional_data['current_muscle_strength'] = np.random.uniform(200, 500, num_records)  # kg
conditional_data['target_VO2_max'] = conditional_data['VO2_max'] + np.random.uniform(1, 5, num_records)
conditional_data['target_lactate_level'] = np.random.uniform(1, 3, num_records)
conditional_data['target_muscle_strength'] = conditional_data['muscle_strength'] + np.random.uniform(5, 50, num_records)

# Endurance Verileri
endurance_data = common_data.copy()
endurance_data['running_distance'] = np.random.uniform(3, 15, num_records)  # km
endurance_data['average_speed'] = np.random.uniform(5, 15, num_records)  # km/h
endurance_data['heart_rate'] = np.random.uniform(60, 200, num_records)  # bpm
endurance_data['zones_x'] = np.random.randint(1, 5, num_records)  # zones
endurance_data['training_intensity'] = np.random.uniform(1, 10, num_records)  # level
endurance_data['session'] = np.random.randint(1, 100, num_records)  # session ID

# 7. Verileri Kaydetme
physical_data.to_csv('../processed_data/physical.csv', index=False)
conditional_data.to_csv('../processed_data/conditional.csv', index=False)
endurance_data.to_csv('../processed_data/endurance.csv', index=False)

print("Veriler başarıyla oluşturuldu ve kaydedildi!")


Veriler başarıyla oluşturuldu ve kaydedildi!
