# 🟩 2-GURUH: Aholi So'rovi Tahlili

**Mavzu:** Sample Olish Usullari va Populyatsiya Xususiyatlari

**Maqsad:** Shahar aholisining demografik ma'lumotlarini tahlil qilish orqali turli sample olish usullarini o'rganish va populyatsiya parametrlarini baholash

---

## 📋 Loyiha Tafsiloti

Sizda Toshkent shahrining 6 ta tumanidagi aholi ma'lumotlari bor. Bu ma'lumotlarni tahlil qilib, quyidagilarni aniqlashingiz kerak:

### 🎯 Asosiy Vazifalar:
1. **Populyatsiya va Sample** farqini amalda ko'rsatish
2. **Sample olish usullari** samaradorligini taqqoslash
3. **Sample hajmi** ta'sirini o'rganish
4. **Tumanlar bo'yicha** demografik farqlarni aniqlash
5. **Populyatsiya parametrlari** bahosi va ishonch intervallari

---

In [None]:
# Kutubxonalarni import qilish va ma'lumotlar tayyorlash
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# Ma'lumotlarni yaratish
np.random.seed(2024)

# Toshkent tumanlari
districts = ['Chilonzor', 'Yunusobod', 'Shayxontohur', 'Mirzo Ulug\'bek', 'Yakkasaroy', 'Olmazor']
education_levels = ['Oliy', 'O\'rta maxsus', 'O\'rta', 'Boshlang\'ich']
employment_status = ['Ishlamoqda', 'Ishsiz', 'Nafaqada', 'O\'qimoqda']

n_people = 2000  # 2000 kishi
data = {
    'ID': range(1, n_people + 1),
    'Tuman': np.random.choice(districts, n_people, p=[0.18, 0.17, 0.16, 0.17, 0.16, 0.16]),
    'Yosh': np.random.normal(35, 15, n_people).astype(int),
    'Jins': np.random.choice(['Erkak', 'Ayol'], n_people, p=[0.49, 0.51]),
    'Ta\'lim': np.random.choice(education_levels, n_people, p=[0.35, 0.25, 0.30, 0.10]),
    'Ish_Holati': np.random.choice(employment_status, n_people, p=[0.60, 0.15, 0.15, 0.10]),
    'Oylik_Daromad': np.random.lognormal(13.5, 0.6, n_people).astype(int),  # Log-normal taqsimot
    'Oila_A\'zolari': np.random.poisson(3.5, n_people),
    'Uy_Turi': np.random.choice(['Shaxsiy uy', 'Kvartira', 'Ijara'], n_people, p=[0.30, 0.50, 0.20]),
    'Transport': np.random.choice(['Avtomobil', 'Jamoat transport', 'Piyoda'], n_people, p=[0.35, 0.55, 0.10])
}

# DataFrame yaratish va ma'lumotlarni to'g'rilash
df = pd.DataFrame(data)
df['Yosh'] = np.clip(df['Yosh'], 16, 80)
df['Oylik_Daromad'] = np.clip(df['Oylik_Daromad'], 500000, 20000000)  # 500 ming - 20 mln so'm
df['Oila_A\'zolari'] = np.clip(df['Oila_A\'zolari'], 1, 10)

print("🏙️ TOSHKENT SHAHRI AHOLI SO'ROVI MA'LUMOTLARI")
print("="*60)
print(f"Jami kuzatuvlar: {len(df):,} kishi")
print(f"Tumanlar soni: {df['Tuman'].nunique()}")
print(f"Yosh oralig'i: {df['Yosh'].min()}-{df['Yosh'].max()} yil")
print(f"Daromad oralig'i: {df['Oylik_Daromad'].min():,}-{df['Oylik_Daromad'].max():,} so'm")

print("\n📊 Ma'lumotlar ko'rinishi:")
print(df.head(10))

print("\n📈 Asosiy statistikalar:")
print(df.describe())

## 1️⃣ Vazifa: Populyatsiya Parametrlari vs Sample Statistiklari

### Aholining asosiy xususiyatlarini populyatsiya va sample nuqtai nazaridan tahlil qiling:

In [None]:
# TODO: 1-vazifa - Populyatsiya parametrlari
print("🌍 1-VAZIFA: POPULYATSIYA PARAMETRLARI vs SAMPLE STATISTIKLARI")
print("="*60)

# "Populyatsiya" parametrlari (bizning to'liq ma'lumotlarimiz)
print("🏛️ POPULYATSIYA PARAMETRLARI (μ, σ, N):")

yosh_mu = df['Yosh'].mean()
yosh_sigma = df['Yosh'].std()
daromad_mu = df['Oylik_Daromad'].mean()
daromad_sigma = df['Oylik_Daromad'].std()
N = len(df)

print(f"• Aholi soni (N): {N:,} kishi")
print(f"• O'rtacha yosh (μ): {yosh_mu:.2f} yil")
print(f"• Yosh standart og'ish (σ): {yosh_sigma:.2f} yil")
print(f"• O'rtacha daromad (μ): {daromad_mu:,.0f} so'm")
print(f"• Daromad standart og'ish (σ): {daromad_sigma:,.0f} so'm")

# Aholi tarkibi (kategorik o'zgaruvchilar)
print(f"\n📊 AHOLI TARKIBI:")
print(f"• Jins taqsimoti:")
gender_dist = df['Jins'].value_counts(normalize=True) * 100
for gender, pct in gender_dist.items():
    print(f"  - {gender}: {pct:.1f}%")

print(f"• Ta'lim darajasi:")
edu_dist = df['Ta\'lim'].value_counts(normalize=True) * 100
for edu, pct in edu_dist.items():
    print(f"  - {edu}: {pct:.1f}%")

# Sample olish va taqqoslash
sample_size = 200
sample_data = df.sample(n=sample_size, random_state=42)

print(f"\n📊 SAMPLE STATISTIKLARI (x̄, s, n = {sample_size}):")
sample_yosh_mean = sample_data['Yosh'].mean()
sample_yosh_std = sample_data['Yosh'].std()
sample_daromad_mean = sample_data['Oylik_Daromad'].mean()
sample_daromad_std = sample_data['Oylik_Daromad'].std()

print(f"• Sample hajmi (n): {len(sample_data)} kishi")
print(f"• Sample o'rtacha yosh (x̄): {sample_yosh_mean:.2f} yil")
print(f"• Sample yosh standart og'ish (s): {sample_yosh_std:.2f} yil")
print(f"• Sample o'rtacha daromad (x̄): {sample_daromad_mean:,.0f} so'm")
print(f"• Sample daromad standart og'ish (s): {sample_daromad_std:,.0f} so'm")

# Xatoliklar
print(f"\n🎯 PARAMETR vs STATISTIK TAQQOSLASH:")
yosh_error = abs(yosh_mu - sample_yosh_mean)
daromad_error = abs(daromad_mu - sample_daromad_mean)

print(f"• Yosh xatoligi: {yosh_error:.2f} yil ({yosh_error/yosh_mu*100:.1f}%)")
print(f"• Daromad xatoligi: {daromad_error:,.0f} so'm ({daromad_error/daromad_mu*100:.1f}%)")

# TODO: Sizning tahlil va xulosalar
print(f"\n💡 SIZNING TAHLIL:")
print("1. Sample statistiklari populyatsiya parametrlarini qanchalik yaxshi aks ettiradi?")
print("Javob: ...")

print("\n2. Qaysi o'zgaruvchi uchun sample eng yaxshi natija berdi?")
print("Javob: ...")

print("\n3. Bu ma'lumotlar haqiqiy populyatsiyani qanchalik ifodalaydi?")
print("Javob: ...")

## 2️⃣ Vazifa: Sample Olish Usullarini Taqqoslash

### 4 xil sample olish usulini qo'llab, ularning samaradorligini baholang:

In [None]:
# TODO: 2-vazifa - Sample olish usullarini taqqoslash
print("🎲 2-VAZIFA: SAMPLE OLISH USULLARINI TAQQOSLASH")
print("="*60)

sample_size = 150

# 1. Oddiy tasodifiy sample (Simple Random Sampling)
def simple_random_sample(data, n):
    return data.sample(n=n, random_state=42)

# 2. Qatlamli sample (Stratified Sampling) - Tuman bo'yicha
def stratified_sample_by_district(data, n):
    # Har tumandan proportsional ravishda sample olish
    district_counts = data['Tuman'].value_counts()
    total = len(data)
    
    sample_list = []
    for district in district_counts.index:
        district_data = data[data['Tuman'] == district]
        district_sample_size = int((len(district_data) / total) * n)
        if district_sample_size > 0:
            district_sample = district_data.sample(n=min(district_sample_size, len(district_data)), random_state=42)
            sample_list.append(district_sample)
    
    return pd.concat(sample_list, ignore_index=True)

# 3. Qatlamli sample (Stratified Sampling) - Yosh guruhi bo'yicha
def stratified_sample_by_age(data, n):
    # Yosh guruhlarini yaratish
    data_copy = data.copy()
    data_copy['Yosh_Guruh'] = pd.cut(data_copy['Yosh'], 
                                    bins=[15, 25, 35, 45, 55, 80], 
                                    labels=['16-25', '26-35', '36-45', '46-55', '56+'])
    
    age_counts = data_copy['Yosh_Guruh'].value_counts()
    total = len(data_copy)
    
    sample_list = []
    for age_group in age_counts.index:
        age_data = data_copy[data_copy['Yosh_Guruh'] == age_group]
        age_sample_size = int((len(age_data) / total) * n)
        if age_sample_size > 0:
            age_sample = age_data.sample(n=min(age_sample_size, len(age_data)), random_state=42)
            sample_list.append(age_sample)
    
    return pd.concat(sample_list, ignore_index=True).drop('Yosh_Guruh', axis=1)

# 4. Klaster sample (Cluster Sampling) - 2 ta tumanni to'liq olish
def cluster_sample(data, n_clusters=2):
    selected_districts = np.random.choice(df['Tuman'].unique(), n_clusters, replace=False)
    cluster_data = data[data['Tuman'].isin(selected_districts)]
    # Agar juda ko'p bo'lsa, tasodifiy ravishda kamaytirish
    if len(cluster_data) > sample_size * 1.5:
        cluster_data = cluster_data.sample(n=int(sample_size * 1.2), random_state=42)
    return cluster_data

# Sample'larni olish
simple_sample = simple_random_sample(df, sample_size)
stratified_district = stratified_sample_by_district(df, sample_size)
stratified_age = stratified_sample_by_age(df, sample_size)
cluster_sample_data = cluster_sample(df, 2)

samples = {
    'Oddiy tasodifiy': simple_sample,
    'Qatlamli (tuman)': stratified_district,
    'Qatlamli (yosh)': stratified_age,
    'Klaster': cluster_sample_data
}

print("📊 SAMPLE HAJMLARI:")
for name, sample in samples.items():
    print(f"• {name}: {len(sample)} kishi")

# Yosh o'rtachasini taqqoslash
print(f"\n📈 YOSH O'RTACHASI TAQQOSLASH:")
print(f"• Populyatsiya: {df['Yosh'].mean():.2f} yil")

for name, sample in samples.items():
    sample_mean = sample['Yosh'].mean()
    error = abs(sample_mean - df['Yosh'].mean())
    print(f"• {name}: {sample_mean:.2f} yil (xatolik: {error:.2f} yil)")

# Daromad o'rtachasini taqqoslash  
print(f"\n💰 DAROMAD O'RTACHASI TAQQOSLASH:")
print(f"• Populyatsiya: {df['Oylik_Daromad'].mean():,.0f} so'm")

for name, sample in samples.items():
    sample_mean = sample['Oylik_Daromad'].mean()
    error = abs(sample_mean - df['Oylik_Daromad'].mean())
    error_pct = (error / df['Oylik_Daromad'].mean()) * 100
    print(f"• {name}: {sample_mean:,.0f} so'm (xatolik: {error_pct:.1f}%)")

# Tuman taqsimoti taqqoslash
print(f"\n🏘️ TUMAN TAQSIMOTI TAQQOSLASH:")
print("Populyatsiya taqsimoti:")
pop_district_dist = df['Tuman'].value_counts(normalize=True).sort_index()
print(pop_district_dist.round(3))

print(f"\nQatlamli (tuman) sample taqsimoti:")
strat_district_dist = stratified_district['Tuman'].value_counts(normalize=True).sort_index()
print(strat_district_dist.round(3))

# TODO: Sample usullarini baholash
print(f"\n🎯 SIZNING BAHOLASH:")
print("1. Qaysi sample usuli eng yaxshi natija berdi?")
print("Javob: ...")

print("\n2. Har bir usulning afzalliklari:")
print("• Oddiy tasodifiy: ...")
print("• Qatlamli (tuman): ...")
print("• Qatlamli (yosh): ...")
print("• Klaster: ...")

print("\n3. Qaysi holatda qaysi usulni ishlatish kerak?")
print("Javob: ...")

# Vizualizatsiya
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# 1. Yosh taqsimoti taqqoslash
axes[0,0].hist(df['Yosh'], bins=20, alpha=0.5, label='Populyatsiya', color='blue')
axes[0,0].hist(simple_sample['Yosh'], bins=15, alpha=0.7, label='Oddiy tasodifiy', color='red')
axes[0,0].set_title('Yosh Taqsimoti: Populyatsiya vs Sample')
axes[0,0].set_xlabel('Yosh')
axes[0,0].set_ylabel('Chastota')
axes[0,0].legend()

# 2. Tuman taqsimoti
district_comparison = pd.DataFrame({
    'Populyatsiya': pop_district_dist,
    'Qatlamli_sample': strat_district_dist
}).fillna(0)
district_comparison.plot(kind='bar', ax=axes[0,1])
axes[0,1].set_title('Tuman Taqsimoti Taqqoslash')
axes[0,1].set_ylabel('Nisbat')
axes[0,1].tick_params(axis='x', rotation=45)

# 3. Sample o'rtachalarining taqqoslash
methods = list(samples.keys())
age_means = [samples[method]['Yosh'].mean() for method in methods]
axes[1,0].bar(methods, age_means, color=['blue', 'green', 'orange', 'red'])
axes[1,0].axhline(y=df['Yosh'].mean(), color='black', linestyle='--', label='Populyatsiya')
axes[1,0].set_title('Yosh O\'rtachasi: Sample Usullari')
axes[1,0].set_ylabel('O\'rtacha Yosh')
axes[1,0].tick_params(axis='x', rotation=45)
axes[1,0].legend()

# 4. Daromad o'rtachasi taqqoslash
income_means = [samples[method]['Oylik_Daromad'].mean() for method in methods]
axes[1,1].bar(methods, income_means, color=['blue', 'green', 'orange', 'red'])
axes[1,1].axhline(y=df['Oylik_Daromad'].mean(), color='black', linestyle='--', label='Populyatsiya')
axes[1,1].set_title('Daromad O\'rtachasi: Sample Usullari')
axes[1,1].set_ylabel('O\'rtacha Daromad (so\'m)')
axes[1,1].tick_params(axis='x', rotation=45)
axes[1,1].legend()

plt.tight_layout()
plt.show()

## 3️⃣ Vazifa: Sample Hajmining Ta'sirini O'rganish

### Turli sample hajmlari bilan tajriba o'tkazing va Central Limit Theorem ni kuzating:

In [None]:
# TODO: 3-vazifa - Sample hajmining ta'siri va Central Limit Theorem
print("📏 3-VAZIFA: SAMPLE HAJMI TA'SIRI va CENTRAL LIMIT THEOREM")
print("="*70)

# Populyatsiya parametrlari
pop_age_mean = df['Yosh'].mean()
pop_age_std = df['Yosh'].std()
pop_income_mean = df['Oylik_Daromad'].mean()
pop_income_std = df['Oylik_Daromad'].std()

print(f"Populyatsiya parametrlari:")
print(f"• Yosh: μ = {pop_age_mean:.2f}, σ = {pop_age_std:.2f}")
print(f"• Daromad: μ = {pop_income_mean:,.0f}, σ = {pop_income_std:,.0f}")

# Turli sample hajmlari
sample_sizes = [10, 20, 50, 100, 200, 500]
n_experiments = 100

# Har sample hajmi uchun 100 ta experiment
results = []

for size in sample_sizes:
    age_means = []
    income_means = []
    
    for i in range(n_experiments):
        sample = df.sample(n=size, random_state=i)
        age_means.append(sample['Yosh'].mean())
        income_means.append(sample['Oylik_Daromad'].mean())
    
    # Sample o'rtachalarining statistikasi
    age_mean_of_means = np.mean(age_means)
    age_std_of_means = np.std(age_means)
    income_mean_of_means = np.mean(income_means)
    income_std_of_means = np.std(income_means)
    
    # Nazariy Standard Error
    theoretical_se_age = pop_age_std / np.sqrt(size)
    theoretical_se_income = pop_income_std / np.sqrt(size)
    
    results.append({
        'Sample_Size': size,
        'Age_Mean_of_Means': age_mean_of_means,
        'Age_Std_of_Means': age_std_of_means,
        'Age_Theoretical_SE': theoretical_se_age,
        'Age_Error': abs(age_mean_of_means - pop_age_mean),
        'Income_Mean_of_Means': income_mean_of_means,
        'Income_Std_of_Means': income_std_of_means,
        'Income_Theoretical_SE': theoretical_se_income,
        'Income_Error': abs(income_mean_of_means - pop_income_mean)
    })

results_df = pd.DataFrame(results)
print(f"\n📊 SAMPLE HAJMI TA'SIRI NATIJALARI:")
print(results_df.round(2))

# Central Limit Theorem demonstratsiyasi
print(f"\n🎯 CENTRAL LIMIT THEOREM TASDIQ:")
for size in [30, 100, 200]:
    sample_means = [df.sample(n=size, random_state=i)['Yosh'].mean() for i in range(100)]
    
    # Normallik testi
    shapiro_stat, shapiro_p = stats.shapiro(sample_means)
    is_normal = shapiro_p > 0.05
    
    print(f"Sample hajmi {size}:")
    print(f"  • Sample o'rtachalarining o'rtachasi: {np.mean(sample_means):.2f}")
    print(f"  • Standard Error (amaliy): {np.std(sample_means):.2f}")
    print(f"  • Standard Error (nazariy): {pop_age_std / np.sqrt(size):.2f}")
    print(f"  • Normal taqsimot? {'Ha' if is_normal else 'Yo\'q'} (p = {shapiro_p:.3f})")

# TODO: Central Limit Theorem haqida xulosalar
print(f"\n💡 CENTRAL LIMIT THEOREM HAQIDA SIZNING XULOSALAR:")
print("1. Sample hajmi oshganda sample o'rtachalarining tarqalishi qanday o'zgaradi?")
print("Javob: ...")

print("\n2. Amaliy va nazariy Standard Error mos keladimi?")
print("Javob: ...")

print("\n3. Sample o'rtachalari normal taqsimotga yaqinlashadimi?")
print("Javob: ...")

print("\n4. Minimal sample hajmi tavsiyangiz?")
print("Javob: ...")

# Vizualizatsiya
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# 1. Sample hajmi vs Xatolik (Yosh)
axes[0,0].plot(results_df['Sample_Size'], results_df['Age_Error'], 'o-', color='blue')
axes[0,0].set_title('Sample Hajmi vs Yosh Xatoligi')
axes[0,0].set_xlabel('Sample Hajmi')
axes[0,0].set_ylabel('O\'rtacha Xatolik (yil)')
axes[0,0].grid(True)

# 2. Standard Error taqqoslash (Yosh)
axes[0,1].plot(results_df['Sample_Size'], results_df['Age_Std_of_Means'], 'o-', color='red', label='Amaliy')
axes[0,1].plot(results_df['Sample_Size'], results_df['Age_Theoretical_SE'], 's--', color='green', label='Nazariy')
axes[0,1].set_title('Standard Error Taqqoslash (Yosh)')
axes[0,1].set_xlabel('Sample Hajmi')
axes[0,1].set_ylabel('Standard Error')
axes[0,1].legend()
axes[0,1].grid(True)

# 3. Sample o'rtachalarining taqsimoti (n=30)
sample_means_30 = [df.sample(n=30, random_state=i)['Yosh'].mean() for i in range(100)]
axes[0,2].hist(sample_means_30, bins=15, alpha=0.7, color='skyblue', edgecolor='black')
axes[0,2].axvline(pop_age_mean, color='red', linestyle='--', label=f'μ = {pop_age_mean:.1f}')
axes[0,2].set_title('Sample O\'rtachalar Taqsimoti (n=30)')
axes[0,2].set_xlabel('Sample O\'rtachasi')
axes[0,2].set_ylabel('Chastota')
axes[0,2].legend()

# 4. Sample hajmi vs Xatolik (Daromad)
axes[1,0].plot(results_df['Sample_Size'], results_df['Income_Error'], 'o-', color='purple')
axes[1,0].set_title('Sample Hajmi vs Daromad Xatoligi')
axes[1,0].set_xlabel('Sample Hajmi')
axes[1,0].set_ylabel('O\'rtacha Xatolik (so\'m)')
axes[1,0].grid(True)

# 5. Standard Error taqqoslash (Daromad)
axes[1,1].plot(results_df['Sample_Size'], results_df['Income_Std_of_Means'], 'o-', color='orange', label='Amaliy')
axes[1,1].plot(results_df['Sample_Size'], results_df['Income_Theoretical_SE'], 's--', color='brown', label='Nazariy')
axes[1,1].set_title('Standard Error Taqqoslash (Daromad)')
axes[1,1].set_xlabel('Sample Hajmi')
axes[1,1].set_ylabel('Standard Error')
axes[1,1].legend()
axes[1,1].grid(True)

# 6. Sample o'rtachalarining taqsimoti (n=100)
sample_means_100 = [df.sample(n=100, random_state=i)['Yosh'].mean() for i in range(100)]
axes[1,2].hist(sample_means_100, bins=15, alpha=0.7, color='lightgreen', edgecolor='black')
axes[1,2].axvline(pop_age_mean, color='red', linestyle='--', label=f'μ = {pop_age_mean:.1f}')
axes[1,2].set_title('Sample O\'rtachalar Taqsimoti (n=100)')
axes[1,2].set_xlabel('Sample O\'rtachasi')
axes[1,2].set_ylabel('Chastota')
axes[1,2].legend()

plt.tight_layout()
plt.show()

## 4️⃣ Vazifa: Tumanlar Bo'yicha Demografik Tahlil

### Har bir tuman uchun aholining xususiyatlarini tahlil qiling va farqlarni aniqlang:

In [None]:
# TODO: 4-vazifa - Tumanlar bo'yicha demografik tahlil
print("🏘️ 4-VAZIFA: TUMANLAR BO'YICHA DEMOGRAFIK TAHLIL")
print("="*60)

# Har tuman uchun asosiy statistikalar
district_stats = df.groupby('Tuman').agg({
    'Yosh': ['count', 'mean', 'median', 'std'],
    'Oylik_Daromad': ['mean', 'median', 'std'],
    'Oila_A\'zolari': ['mean', 'std']
}).round(2)

# Ustun nomlarini o'zgartirish
district_stats.columns = ['Aholi_Soni', 'Yosh_Ortacha', 'Yosh_Mediana', 'Yosh_Std',
                         'Daromad_Ortacha', 'Daromad_Mediana', 'Daromad_Std',
                         'Oila_Ortacha', 'Oila_Std']

print("📊 TUMANLAR BO'YICHA ASOSIY STATISTIKALAR:")
print(district_stats)

# Eng yuqori va eng past ko'rsatkichlar
print(f"\n🏆 ENG YUQORI KO'RSATKICHLAR:")
highest_income = district_stats['Daromad_Ortacha'].idxmax()
highest_age = district_stats['Yosh_Ortacha'].idxmax()
largest_family = district_stats['Oila_Ortacha'].idxmax()

print(f"• Eng yuqori daromad: {highest_income} ({district_stats.loc[highest_income, 'Daromad_Ortacha']:,.0f} so'm)")
print(f"• Eng katta yosh: {highest_age} ({district_stats.loc[highest_age, 'Yosh_Ortacha']:.1f} yil)")
print(f"• Eng katta oila: {largest_family} ({district_stats.loc[largest_family, 'Oila_Ortacha']:.1f} kishi)")

print(f"\n📉 ENG PAST KO'RSATKICHLAR:")
lowest_income = district_stats['Daromad_Ortacha'].idxmin()
lowest_age = district_stats['Yosh_Ortacha'].idxmin()
smallest_family = district_stats['Oila_Ortacha'].idxmin()

print(f"• Eng past daromad: {lowest_income} ({district_stats.loc[lowest_income, 'Daromad_Ortacha']:,.0f} so'm)")
print(f"• Eng kichik yosh: {lowest_age} ({district_stats.loc[lowest_age, 'Yosh_Ortacha']:.1f} yil)")
print(f"• Eng kichik oila: {smallest_family} ({district_stats.loc[smallest_family, 'Oila_Ortacha']:.1f} kishi)")

# Ta'lim darajasi bo'yicha tahlil
print(f"\n🎓 TA'LIM DARAJASI BO'YICHA TAHLIL:")
education_by_district = pd.crosstab(df['Tuman'], df['Ta\'lim'], normalize='index') * 100
print("Har tumandagi ta'lim darajasi (%):")
print(education_by_district.round(1))

# Oliy ma'lumotli eng ko'p tuman
highest_edu_district = education_by_district['Oliy'].idxmax()
print(f"\nEng ko'p oliy ma'lumotli: {highest_edu_district} ({education_by_district.loc[highest_edu_district, 'Oliy']:.1f}%)")

# Ish holati bo'yicha tahlil
print(f"\n💼 ISH HOLATI BO'YICHA TAHLIL:")
employment_by_district = pd.crosstab(df['Tuman'], df['Ish_Holati'], normalize='index') * 100
print("Har tumandagi ish holati (%):")
print(employment_by_district.round(1))

# Transport foydalanish
print(f"\n🚗 TRANSPORT FOYDALANISH:")
transport_by_district = pd.crosstab(df['Tuman'], df['Transport'], normalize='index') * 100
print("Har tumandagi transport turi (%):")
print(transport_by_district.round(1))

# TODO: Tumanlar orasidagi farqlar haqida xulosalar
print(f"\n🎯 SIZNING TAHLIL:")
print("1. Qaysi tuman eng rivojlangan va nima uchun?")
print("Javob: ...")

print("\n2. Tumanlar orasidagi asosiy farqlar?")
print("Javob: ...")

print("\n3. Iqtisodiy tengsizlik darajasi qanday?")
print("Javob: ...")

print("\n4. Qaysi tumanga ko'proq e'tibor berish kerak?")
print("Javob: ...")

# ANOVA testi - tumanlar o'rtasida sezilarli farq bormi?
print(f"\n📊 STATISTIK TESTLAR:")

# Daromad uchun ANOVA
district_groups = [df[df['Tuman'] == district]['Oylik_Daromad'] for district in df['Tuman'].unique()]
f_stat, p_value = stats.f_oneway(*district_groups)

print(f"Daromad uchun ANOVA testi:")
print(f"• F-statistik: {f_stat:.2f}")
print(f"• p-qiymat: {p_value:.4f}")
print(f"• Natija: {'Tumanlar orasida sezilarli farq bor' if p_value < 0.05 else 'Tumanlar orasida sezilarli farq yo\'q'}")

# Comprehensive vizualizatsiya
fig, axes = plt.subplots(3, 2, figsize=(15, 18))

# 1. Tumanlar bo'yicha yosh taqsimoti
df.boxplot(column='Yosh', by='Tuman', ax=axes[0,0])
axes[0,0].set_title('Tumanlar bo\'yicha Yosh Taqsimoti')
axes[0,0].set_xlabel('Tuman')
axes[0,0].set_ylabel('Yosh')

# 2. Tumanlar bo'yicha daromad taqsimoti  
df.boxplot(column='Oylik_Daromad', by='Tuman', ax=axes[0,1])
axes[0,1].set_title('Tumanlar bo\'yicha Daromad Taqsimoti')
axes[0,1].set_xlabel('Tuman')
axes[0,1].set_ylabel('Oylik Daromad')

# 3. Ta'lim darajasi bo'yicha taqsimot
education_by_district.plot(kind='bar', stacked=True, ax=axes[1,0], colormap='viridis')
axes[1,0].set_title('Tumanlar bo\'yicha Ta\'lim Darajasi')
axes[1,0].set_ylabel('Foiz (%)')
axes[1,0].tick_params(axis='x', rotation=45)

# 4. Ish holati bo'yicha taqsimot
employment_by_district.plot(kind='bar', stacked=True, ax=axes[1,1], colormap='Set2')
axes[1,1].set_title('Tumanlar bo\'yicha Ish Holati')
axes[1,1].set_ylabel('Foiz (%)')
axes[1,1].tick_params(axis='x', rotation=45)

# 5. O'rtacha daromad taqqoslash
district_income_mean = df.groupby('Tuman')['Oylik_Daromad'].mean().sort_values(ascending=True)
district_income_mean.plot(kind='barh', ax=axes[2,0], color='lightblue')
axes[2,0].set_title('Tumanlar bo\'yicha O\'rtacha Daromad')
axes[2,0].set_xlabel('O\'rtacha Daromad (so\'m)')

# 6. Oila a'zolari o'rtachasi
family_size_mean = df.groupby('Tuman')['Oila_A\'zolari'].mean().sort_values(ascending=True)
family_size_mean.plot(kind='barh', ax=axes[2,1], color='lightgreen')
axes[2,1].set_title('Tumanlar bo\'yicha O\'rtacha Oila Hajmi')
axes[2,1].set_xlabel('O\'rtacha Oila A\'zolari')

plt.tight_layout()
plt.show()

## 5️⃣ Vazifa: Confidence Intervals va Populyatsiya Parametrlari Bahosi

### Sample ma'lumotlari asosida populyatsiya parametrlarini baholang va confidence intervallarini hisoblang:

In [None]:
# TODO: 5-vazifa - Confidence Intervals va Populyatsiya Parametrlari Bahosi
print("📊 5-VAZIFA: CONFIDENCE INTERVALS va POPULYATSIYA PARAMETRLARI BAHOSI")
print("="*70)

# Katta sample olish (300 kishi)
large_sample = df.sample(n=300, random_state=42)

print(f"Sample hajmi: {len(large_sample)} kishi")

# Confidence interval uchun z-score (normal taqsimot uchun)
confidence_levels = [0.90, 0.95, 0.99]
z_scores = {0.90: 1.645, 0.95: 1.96, 0.99: 2.576}

print(f"\n📈 YOSH UCHUN CONFIDENCE INTERVALS:")

sample_age_mean = large_sample['Yosh'].mean()
sample_age_std = large_sample['Yosh'].std()
n = len(large_sample)
age_se = sample_age_std / np.sqrt(n)

print(f"Sample o'rtacha yosh: {sample_age_mean:.2f} yil")
print(f"Sample standart og'ish: {sample_age_std:.2f} yil")
print(f"Standard Error: {age_se:.3f} yil")

for conf_level in confidence_levels:
    z = z_scores[conf_level]
    margin_error = z * age_se
    ci_lower = sample_age_mean - margin_error
    ci_upper = sample_age_mean + margin_error
    
    # Haqiqiy parametr intervalda ekanligini tekshirish
    actual_in_ci = ci_lower <= df['Yosh'].mean() <= ci_upper
    
    print(f"\n{conf_level*100:.0f}% Confidence Interval:")
    print(f"  [{ci_lower:.2f}, {ci_upper:.2f}] yil")
    print(f"  Margin of Error: ±{margin_error:.2f} yil")
    print(f"  Haqiqiy parametr intervalda? {'Ha' if actual_in_ci else 'Yo\'q'}")

print(f"\n💰 DAROMAD UCHUN CONFIDENCE INTERVALS:")

sample_income_mean = large_sample['Oylik_Daromad'].mean()
sample_income_std = large_sample['Oylik_Daromad'].std()
income_se = sample_income_std / np.sqrt(n)

print(f"Sample o'rtacha daromad: {sample_income_mean:,.0f} so'm")
print(f"Sample standart og'ish: {sample_income_std:,.0f} so'm")
print(f"Standard Error: {income_se:,.0f} so'm")

for conf_level in confidence_levels:
    z = z_scores[conf_level]
    margin_error = z * income_se
    ci_lower = sample_income_mean - margin_error
    ci_upper = sample_income_mean + margin_error
    
    actual_in_ci = ci_lower <= df['Oylik_Daromad'].mean() <= ci_upper
    
    print(f"\n{conf_level*100:.0f}% Confidence Interval:")
    print(f"  [{ci_lower:,.0f}, {ci_upper:,.0f}] so'm")
    print(f"  Margin of Error: ±{margin_error:,.0f} so'm")
    print(f"  Haqiqiy parametr intervalda? {'Ha' if actual_in_ci else 'Yo\'q'}")

# Proportsiya uchun confidence interval (oliy ma'lumot foizi)
print(f"\n🎓 OLIY MA'LUMOT FOIZI UCHUN CONFIDENCE INTERVALS:")

# Sample'dagi oliy ma'lumotlilar foizi
higher_edu_count = (large_sample['Ta\'lim'] == 'Oliy').sum()
sample_proportion = higher_edu_count / n
se_proportion = np.sqrt(sample_proportion * (1 - sample_proportion) / n)

print(f"Sample'dagi oliy ma'lumotlilar: {higher_edu_count}/{n} = {sample_proportion:.3f}")
print(f"Standard Error (proportion): {se_proportion:.3f}")

for conf_level in confidence_levels:
    z = z_scores[conf_level]
    margin_error = z * se_proportion
    ci_lower = max(0, sample_proportion - margin_error)
    ci_upper = min(1, sample_proportion + margin_error)
    
    # Haqiqiy proportsiya
    actual_proportion = (df['Ta\'lim'] == 'Oliy').sum() / len(df)
    actual_in_ci = ci_lower <= actual_proportion <= ci_upper
    
    print(f"\n{conf_level*100:.0f}% Confidence Interval:")
    print(f"  [{ci_lower:.3f}, {ci_upper:.3f}] yoki [{ci_lower*100:.1f}%, {ci_upper*100:.1f}%]")
    print(f"  Margin of Error: ±{margin_error*100:.1f}%")
    print(f"  Haqiqiy proportsiya intervalda? {'Ha' if actual_in_ci else 'Yo\'q'}")

# Sample hajmini hisoblash (berilgan xatolik uchun)
print(f"\n📏 KERAKLI SAMPLE HAJMI HISOBLASH:")

desired_margin_errors = [1, 0.5, 0.2]  # yil uchun
confidence_level = 0.95
z = z_scores[confidence_level]

print(f"Yosh uchun kerakli sample hajmlari ({confidence_level*100:.0f}% confidence):")
for margin in desired_margin_errors:
    required_n = ((z * sample_age_std) / margin) ** 2
    print(f"  ±{margin} yil xatolik uchun: {required_n:.0f} kishi")

# TODO: Confidence intervals haqida xulosalar
print(f"\n💡 CONFIDENCE INTERVALS HAQIDA SIZNING XULOSALAR:")
print("1. Confidence level oshganda interval qanday o'zgaradi?")
print("Javob: ...")

print("\n2. Qaysi parametr uchun eng aniq baho oldingiz?")
print("Javob: ...")

print("\n3. 95% confidence interval nimani anglatadi?")
print("Javob: ...")

print("\n4. Sample hajmini oshirish qanday ta'sir qiladi?")
print("Javob: ...")

# Vizualizatsiya
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# 1. Yosh uchun confidence intervals
conf_levels_pct = [90, 95, 99]
age_margins = []
for conf_level in confidence_levels:
    z = z_scores[conf_level]
    margin = z * age_se
    age_margins.append(margin)

axes[0,0].bar(conf_levels_pct, age_margins, color='skyblue')
axes[0,0].set_title('Yosh uchun Margin of Error')
axes[0,0].set_xlabel('Confidence Level (%)')
axes[0,0].set_ylabel('Margin of Error (yil)')

# 2. Daromad uchun confidence intervals
income_margins = []
for conf_level in confidence_levels:
    z = z_scores[conf_level]
    margin = z * income_se
    income_margins.append(margin)

axes[0,1].bar(conf_levels_pct, income_margins, color='lightgreen')
axes[0,1].set_title('Daromad uchun Margin of Error')
axes[0,1].set_xlabel('Confidence Level (%)')
axes[0,1].set_ylabel('Margin of Error (so\'m)')

# 3. Sample o'rtachasi va confidence interval (yosh)
axes[1,0].plot([1], [sample_age_mean], 'o', markersize=10, color='blue')
z_95 = z_scores[0.95]
margin_95 = z_95 * age_se
axes[1,0].errorbar([1], [sample_age_mean], 
                   yerr=[[margin_95], [margin_95]], 
                   fmt='o', color='blue', capsize=10, capthick=2)
axes[1,0].axhline(df['Yosh'].mean(), color='red', linestyle='--', 
                  label=f'Haqiqiy μ = {df["Yosh"].mean():.1f}')
axes[1,0].set_xlim(0.5, 1.5)
axes[1,0].set_title('Yosh uchun 95% CI')
axes[1,0].set_ylabel('Yosh')
axes[1,0].legend()
axes[1,0].set_xticks([])

# 4. Sample hajmi vs Margin of Error
sample_sizes_test = [50, 100, 200, 300, 500, 1000]
margins_by_size = []
for size in sample_sizes_test:
    se = sample_age_std / np.sqrt(size)
    margin = z_scores[0.95] * se
    margins_by_size.append(margin)

axes[1,1].plot(sample_sizes_test, margins_by_size, 'o-', color='purple')
axes[1,1].set_title('Sample Hajmi vs Margin of Error')
axes[1,1].set_xlabel('Sample Hajmi')
axes[1,1].set_ylabel('Margin of Error (yil)')
axes[1,1].grid(True)

plt.tight_layout()
plt.show()

print(f"\n📊 HAQIQIY vs BAHOLANGAN PARAMETRLAR:")
print(f"Yosh:")
print(f"  • Haqiqiy μ: {df['Yosh'].mean():.2f} yil")
print(f"  • Sample x̄: {sample_age_mean:.2f} yil")
print(f"  • Farq: {abs(df['Yosh'].mean() - sample_age_mean):.2f} yil")

print(f"\nDaromad:")
print(f"  • Haqiqiy μ: {df['Oylik_Daromad'].mean():,.0f} so'm")
print(f"  • Sample x̄: {sample_income_mean:,.0f} so'm")
print(f"  • Farq: {abs(df['Oylik_Daromad'].mean() - sample_income_mean):,.0f} so'm")

## 📊 Loyiha Yakuniy Xulosalari

### Barcha tahlillar asosida umumiy xulosalar va tavsiyalar:

In [None]:
# TODO: Yakuniy xulosalar va tavsiyalar
print("🎯 LOYIHA YAKUNIY XULOSALARI")
print("="*50)

print("📋 ASOSIY TOPILMALAR:")

print(f"\n1️⃣ POPULYATSIYA XUSUSIYATLARI:")
print(f"   • Jami kuzatuvlar: {len(df):,} kishi")
print(f"   • O'rtacha yosh: {df['Yosh'].mean():.1f} yil")
print(f"   • O'rtacha daromad: {df['Oylik_Daromad'].mean():,.0f} so'm")
print(f"   • Oliy ma'lumotlilar: {((df['Ta\'lim'] == 'Oliy').sum() / len(df) * 100):.1f}%")

print(f"\n2️⃣ SAMPLE USULLARI SAMARADORLIGI:")
# Eng yaxshi usulni aniqlash (xatolik asosida)
print(f"   • Eng samarali: Qatlamli sample (tuman bo'yicha)")
print(f"   • Sababi: Har tuman proportsional ravishda aks etadi")
print(f"   • Tavsiya: Demografik tadqiqotlar uchun qatlamli sample")

print(f"\n3️⃣ SAMPLE HAJMI TAVSIYALARI:")
optimal_sample_size = 200  # 95% confidence, ±1 yil xatolik uchun
print(f"   • Minimal hajm: {optimal_sample_size} kishi")
print(f"   • Yosh uchun ±1 yil aniqlik")
print(f"   • 95% confidence level")

print(f"\n4️⃣ TUMANLAR O'RTASIDAGI FARQLAR:")
richest_district = df.groupby('Tuman')['Oylik_Daromad'].mean().idxmax()
poorest_district = df.groupby('Tuman')['Oylik_Daromad'].mean().idxmin()
richest_income = df.groupby('Tuman')['Oylik_Daromad'].mean().max()
poorest_income = df.groupby('Tuman')['Oylik_Daromad'].mean().min()

print(f"   • Eng yuqori daromad: {richest_district} ({richest_income:,.0f} so'm)")
print(f"   • Eng past daromad: {poorest_district} ({poorest_income:,.0f} so'm)")
print(f"   • Farq: {richest_income - poorest_income:,.0f} so'm ({((richest_income/poorest_income - 1)*100):.1f}%)")

# TODO: Sizning yakuniy xulosalaringiz
print(f"\n💡 SIZNING YAKUNIY XULOSALARINGIZ:")
print("1. Bu tadqiqot populyatsiyani qanchalik yaxshi aks ettiradi?")
print("Javob: ...")

print("\n2. Qaysi sample usuli eng maqbul va nima uchun?")
print("Javob: ...")

print("\n3. Toshkent shahri aholisi uchun 3 ta muhim xulosa:")
print("a) ...")
print("b) ...")
print("c) ...")

print("\n4. Keyingi tadqiqotlar uchun tavsiyalar:")
print("a) ...")
print("b) ...")
print("c) ...")

print("\n5. Bu tadqiqotning cheklovlari:")
print("a) ...")
print("b) ...")
print("c) ...")

# Final dashboard
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# 1. Aholi piramidasi (yosh guruhlari)
age_groups = pd.cut(df['Yosh'], bins=[15, 25, 35, 45, 55, 80], 
                   labels=['16-25', '26-35', '36-45', '46-55', '56+'])
age_gender = pd.crosstab(age_groups, df['Jins'])

men_data = age_gender['Erkak'].values
women_data = age_gender['Ayol'].values
y_pos = np.arange(len(age_groups.cat.categories))

axes[0,0].barh(y_pos, -men_data, color='lightblue', label='Erkak')
axes[0,0].barh(y_pos, women_data, color='pink', label='Ayol')
axes[0,0].set_yticks(y_pos)
axes[0,0].set_yticklabels(age_groups.cat.categories)
axes[0,0].set_title('Aholi Piramidasi')
axes[0,0].set_xlabel('Aholi soni')
axes[0,0].legend()

# 2. Daromad taqsimoti
axes[0,1].hist(df['Oylik_Daromad'], bins=30, alpha=0.7, color='green', edgecolor='black')
axes[0,1].axvline(df['Oylik_Daromad'].mean(), color='red', linestyle='--', label='O\'rtacha')
axes[0,1].axvline(df['Oylik_Daromad'].median(), color='blue', linestyle='--', label='Mediana')
axes[0,1].set_title('Daromad Taqsimoti')
axes[0,1].set_xlabel('Oylik Daromad (so\'m)')
axes[0,1].set_ylabel('Chastota')
axes[0,1].legend()

# 3. Ta'lim va daromad bog'lanishi
edu_income = df.groupby('Ta\'lim')['Oylik_Daromad'].mean().sort_values(ascending=False)
edu_income.plot(kind='bar', ax=axes[0,2], color='orange')
axes[0,2].set_title('Ta\'lim Darajasi va O\'rtacha Daromad')
axes[0,2].set_ylabel('O\'rtacha Daromad (so\'m)')
axes[0,2].tick_params(axis='x', rotation=45)

# 4. Tumanlar bo'yicha o'rtacha daromad
district_income = df.groupby('Tuman')['Oylik_Daromad'].mean().sort_values(ascending=True)
district_income.plot(kind='barh', ax=axes[1,0], color='lightcoral')
axes[1,0].set_title('Tumanlar bo\'yicha O\'rtacha Daromad')
axes[1,0].set_xlabel('O\'rtacha Daromad (so\'m)')

# 5. Transport va daromad
transport_income = df.groupby('Transport')['Oylik_Daromad'].mean()
transport_income.plot(kind='bar', ax=axes[1,1], color='purple')
axes[1,1].set_title('Transport Turi va O\'rtacha Daromad')
axes[1,1].set_ylabel('O\'rtacha Daromad (so\'m)')
axes[1,1].tick_params(axis='x', rotation=45)

# 6. Sample hajmi va aniqlik
sample_sizes_vis = [50, 100, 200, 300, 500]
margins_vis = [(z_scores[0.95] * sample_age_std / np.sqrt(size)) for size in sample_sizes_vis]
axes[1,2].plot(sample_sizes_vis, margins_vis, 'o-', color='navy', linewidth=2)
axes[1,2].set_title('Sample Hajmi va Aniqlik')
axes[1,2].set_xlabel('Sample Hajmi')
axes[1,2].set_ylabel('Margin of Error (yil)')
axes[1,2].grid(True)

plt.tight_layout()
plt.show()

print(f"\n📝 LOYIHA TUGALLANDI!")
print("✅ Barcha tahlillar bajarildi")
print("✅ Statistik xulosalar chiqarildi")
print("✅ Taqdimot uchun materiallar tayyorlandi")