# Havayolu Yemek Talebi Tahmini - Keşifsel Veri Analizi (EDA)
**Takım Adı:** Vector_Team

Bu not defteri, Havayolu Yemek Talebi Tahmini projesi için sentetik veri setinin oluşturulmasını ve keşifsel veri analizini (EDA) içermektedir.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Set style
sns.set_theme(style='whitegrid')

# Create output directory for plots
os.makedirs('plots', exist_ok=True)

## 1. Sentetik Veri Seti Oluşturma
Proje gereksinimlerine uygun olarak sentetik bir veri seti oluşturulacaktır.

In [None]:
def generate_airline_data(n_rows=5000):
    np.random.seed(42)
    flight_ids = np.arange(1, n_rows + 1)
    passenger_counts = np.random.randint(50, 301, size=n_rows)
    adult_passengers = []
    child_passengers = []
    for count in passenger_counts:
        adult = np.random.randint(int(count * 0.7), count + 1)
        adult_passengers.append(adult)
        child_passengers.append(count - adult)
    adult_passengers = np.array(adult_passengers)
    child_passengers = np.array(child_passengers)
    is_international = np.random.choice([0, 1], size=n_rows, p=[0.8, 0.2])
    flight_durations = []
    for inter in is_international:
        duration = np.random.uniform(3, 12) if inter == 1 else np.random.uniform(1, 8)
        flight_durations.append(round(duration, 2))
    flight_durations = np.array(flight_durations)
    business_class_ratios = np.random.uniform(0, 1.0, size=n_rows)

    total_food_demand = []
    base_meals_per_passenger = 1.0
    for i in range(n_rows):
        dur = flight_durations[i]
        inter = is_international[i]
        biz_ratio = business_class_ratios[i]
        p_count = passenger_counts[i]
        c_count = child_passengers[i]
        if dur < 2: duration_multiplier = 0.8
        elif dur < 4: duration_multiplier = 1.0
        elif dur < 8: duration_multiplier = 1.5
        else: duration_multiplier = 2.0
        international_bonus = 0.3 if inter == 1 else 0.0
        business_bonus = biz_ratio * 0.4
        child_ratio = c_count / p_count
        child_reduction = child_ratio * 0.15
        food_per_passenger = base_meals_per_passenger * duration_multiplier * (1 + international_bonus + business_bonus - child_reduction)
        demand = round(p_count * food_per_passenger)
        demand = max(demand, int(p_count * 0.5))
        total_food_demand.append(demand)

    df = pd.DataFrame({
        'flight_id': flight_ids,
        'flight_duration': flight_durations,
        'passenger_count': passenger_counts,
        'adult_passengers': adult_passengers,
        'child_passengers': child_passengers,
        'business_class_ratio': business_class_ratios,
        'is_international': is_international,
        'total_food_demand': total_food_demand
    })
    return df

df = generate_airline_data(5000)
df.to_csv('Vector_Team_dataset.csv', index=False)
print('Dataset generated successfully: Vector_Team_dataset.csv')

## 2. Keşifsel Veri Analizi (EDA)

In [None]:
df = pd.read_csv('Vector_Team_dataset.csv')
print(df.describe())
print(df.isnull().sum())

plt.figure(figsize=(10, 8))
sns.heatmap(df.drop('flight_id', axis=1).corr(), annot=True, cmap='coolwarm', center=0)
plt.title('Feature Correlation Heatmap')
plt.show()

fig, axes = plt.subplots(1, 2, figsize=(16, 6))
sns.scatterplot(data=df, x='passenger_count', y='total_food_demand', hue='is_international', alpha=0.5, ax=axes[0])
axes[0].set_title('Passenger Count vs Total Food Demand')
sns.scatterplot(data=df, x='flight_duration', y='total_food_demand', hue='is_international', alpha=0.5, ax=axes[1])
axes[1].set_title('Flight Duration vs Total Food Demand')
plt.show()