<a href="https://colab.research.google.com/github/jannatul615/world_population./blob/main/python_sample_(6).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Sampling Assignment
Implementing Probability Sampling Methods in Python

## Instructions
Upload your dataset (minimum 200 rows), then complete all parts A–F.


In [2]:
import pandas as pd
import numpy as np

# Load your dataset
df = pd.read_csv('world_population.csv')
df.head()

Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
0,36,AFG,Afghanistan,Kabul,Asia,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971,652230,63.0587,1.0257,0.52
1,138,ALB,Albania,Tirana,Europe,2842321,2866849,2882481,2913399,3182021,3295066,2941651,2324731,28748,98.8702,0.9957,0.04
2,34,DZA,Algeria,Algiers,Africa,44903225,43451666,39543154,35856344,30774621,25518074,18739378,13795915,2381741,18.8531,1.0164,0.56
3,213,ASM,American Samoa,Pago Pago,Oceania,44273,46189,51368,54849,58230,47818,32886,27075,199,222.4774,0.9831,0.0
4,203,AND,Andorra,Andorra la Vella,Europe,79824,77700,71746,71519,66097,53569,35611,19860,468,170.5641,1.01,0.0


## Part A — Setup
- Report dataset size (rows, columns)

In [3]:
print("Dataset size:", df.shape)

Dataset size: (234, 17)


## Part B — Simple Random Sampling

In [5]:
sample_size = 50
srs = df.sample(n=sample_size, random_state=42)
print(srs.head())
print("Population mean:", df['Growth Rate'].mean())
print("Sample mean:", srs['Growth Rate'].mean())

     Rank CCA3 Country/Territory         Capital      Continent  \
69    184  GUF     French Guiana         Cayenne  South America   
206    20  THA          Thailand         Bangkok           Asia   
180   218  SMR        San Marino      San Marino         Europe   
9     140  ARM           Armenia         Yerevan           Asia   
127   180  MTQ        Martinique  Fort-de-France  North America   

     2022 Population  2020 Population  2015 Population  2010 Population  \
69            304557           290969           257026           228453   
206         71697030         71475664         70294397         68270489   
180            33660            34007            33570            31608   
9            2780469          2805608          2878595          2946293   
127           367507           370391           383515           392181   

     2000 Population  1990 Population  1980 Population  1970 Population  \
69            164351           113931            66825            46484

## Part C — Systematic Sampling

In [6]:
n = 50
k = len(df) // n
start = np.random.randint(0, k)
sys_sample = df.iloc[start::k][:n]
sys_sample.head()

Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
3,213,ASM,American Samoa,Pago Pago,Oceania,44273,46189,51368,54849,58230,47818,32886,27075,199,222.4774,0.9831,0.0
7,201,ATG,Antigua and Barbuda,Saint John’s,North America,93763,92664,89941,85695,75055,63328,64888,64516,442,212.1335,1.0058,0.0
11,55,AUS,Australia,Canberra,Oceania,26177413,25670051,23820236,22019168,19017963,17048003,14706322,12595034,7692024,3.4032,1.0099,0.33
15,154,BHR,Bahrain,Manama,Asia,1472233,1477469,1362142,1213645,711442,517418,362595,222555,765,1924.4876,1.0061,0.02
19,81,BEL,Belgium,Brussels,Europe,11655930,11561717,11248303,10877947,10264343,9959560,9828986,9629376,30528,381.8111,1.0038,0.15


## Part D — Stratified Sampling

In [12]:
strata_col = "country"  # your column
sample_size = 50

# proportional fraction for each group
frac = sample_size / len(df)

# stratified sample
stratified_sample = df.groupby(strata_col, group_keys=False).sample(frac=frac, random_state=42)

stratified_sample.head()

Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage,country
15,154,BHR,Bahrain,Manama,Asia,1472233,1477469,1362142,1213645,711442,517418,362595,222555,765,1924.4876,1.0061,0.02,0
9,140,ARM,Armenia,Yerevan,Asia,2780469,2805608,2878595,2946293,3168523,3556539,3135123,2534377,29743,93.4831,0.9962,0.03,0
0,36,AFG,Afghanistan,Kabul,Asia,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971,652230,63.0587,1.0257,0.52,0
8,33,ARG,Argentina,Buenos Aires,South America,45510318,45036032,43257065,41100123,37070774,32637657,28024803,23842803,2780400,16.3683,1.0052,0.57,0
17,186,BRB,Barbados,Bridgetown,North America,281635,280693,278083,274711,264657,258868,253575,241397,430,654.9651,1.0015,0.0,0


## Part E — Cluster Sampling

In [24]:
df['continent'] = df.index // (len(df)//10)  # 10 clusters
selected_clusters = np.random.choice(df['continent'].unique(), size=2, replace=False)
cluster_sample = df[df['continent'].isin(selected_clusters)]
print("Selected clusters:", selected_clusters)
cluster_sample.head()

Selected clusters: [1 0]


Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,...,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage,country,capital,rank,continent,growth rate
0,0,0,Afghanistan,Kabul,Asia,41128771,38972230,33753499,28189672,19542982,...,10752971,652230,63.0587,0,0.52,0,0,0,0,0
1,0,0,Albania,Tirana,Europe,2842321,2866849,2882481,2913399,3182021,...,2324731,28748,98.8702,0,0.04,0,0,0,0,0
2,0,0,Algeria,Algiers,Africa,44903225,43451666,39543154,35856344,30774621,...,13795915,2381741,18.8531,0,0.56,0,0,0,0,0
3,0,0,American Samoa,Pago Pago,Oceania,44273,46189,51368,54849,58230,...,27075,199,222.4774,0,0.0,0,0,0,0,0
4,0,0,Andorra,Andorra la Vella,Europe,79824,77700,71746,71519,66097,...,19860,468,170.5641,0,0.0,0,0,0,0,0


## Part F — Comparison & Reflection
Compare sample means vs population mean, then write your reflection.