In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

Part-A: Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [15]:
df = pd.read_csv('Global_Tech_Gadget_Consumption.csv')
df.head()

Unnamed: 0,Country,Year,Smartphone Sales (Millions),Laptop Shipments (Millions),Gaming Console Adoption (%),Smartwatch Penetration (%),Average Consumer Spending on Gadgets ($),E-Waste Generated (Metric Tons),5G Penetration Rate (%)
0,USA,2015,111.37,123.63,12.05,14.49,311.21,939.89,54.64
1,USA,2016,224.65,65.27,12.92,9.88,250.46,1361.42,53.61
2,USA,2017,102.12,26.75,19.63,17.35,2404.22,872.52,29.94
3,USA,2018,148.1,129.28,26.44,12.45,1883.37,1241.41,75.88
4,USA,2019,83.93,97.81,20.84,6.18,777.42,1939.39,76.26


In [16]:
print("Dataset size:", df.shape)

Dataset size: (110, 9)


Part-B: Simple Random Sampling

In [21]:
sample_size = 50
srs = df.sample(n=sample_size, random_state=42)
print(srs.head())
print("Population mean:", df['Smartphone Sales (Millions)'].mean())
print("Sample mean:", srs['Smartphone Sales (Millions)'].mean())

        Country  Year  Smartphone Sales (Millions)  \
78  South Korea  2016                       111.02   
10          USA  2025                       367.74   
4           USA  2019                        83.93   
84  South Korea  2022                       444.60   
64       France  2024                       234.44   

    Laptop Shipments (Millions)  Gaming Console Adoption (%)  \
78                        21.29                        16.48   
10                        78.09                        35.38   
4                         97.81                        20.84   
84                        80.76                        35.63   
64                       140.20                        16.26   

    Smartwatch Penetration (%)  Average Consumer Spending on Gadgets ($)  \
78                        6.36                                   1872.46   
10                       19.68                                   2611.60   
4                         6.18                                

Part-C: Systematic Sampling

In [22]:
n = 50
k = len(df) // n
start = np.random.randint(0, k)
sys_sample = df.iloc[start::k][:n]
sys_sample.head()

Unnamed: 0,Country,Year,Smartphone Sales (Millions),Laptop Shipments (Millions),Gaming Console Adoption (%),Smartwatch Penetration (%),Average Consumer Spending on Gadgets ($),E-Waste Generated (Metric Tons),5G Penetration Rate (%)
1,USA,2016,224.65,65.27,12.92,9.88,250.46,1361.42,53.61
3,USA,2018,148.1,129.28,26.44,12.45,1883.37,1241.41,75.88
5,USA,2020,300.49,100.59,28.16,17.68,593.73,136.97,58.45
7,USA,2022,239.21,63.46,7.04,14.72,339.59,175.0,65.69
9,USA,2024,444.57,58.6,7.67,15.42,1107.74,1902.16,55.95


Part-D: Stratified Sampling

In [24]:
strata_col = "Gaming Console Adoption (%)"  # your column
sample_size = 50

# proportional fraction for each group
frac = sample_size / len(df)

# stratified sample
stratified_sample = df.groupby(strata_col, group_keys=False).sample(frac=frac, random_state=42)

stratified_sample.head()

Unnamed: 0,Country,Year,Smartphone Sales (Millions),Laptop Shipments (Millions),Gaming Console Adoption (%),Smartwatch Penetration (%),Average Consumer Spending on Gadgets ($),E-Waste Generated (Metric Tons),5G Penetration Rate (%)
78,South Korea,2016,111.02,21.29,16.48,6.36,1872.46,111.47,62.47
77,South Korea,2015,241.26,116.14,19.64,21.05,470.41,147.73,21.34
73,Japan,2022,238.54,72.65,28.46,8.33,458.93,1118.76,58.11
84,South Korea,2022,444.6,80.76,35.63,2.64,2891.9,142.36,55.94


Part-E: Cluster Sampling

In [25]:
df['cluster_id'] = df.index // (len(df)//10)  # 10 clusters
selected_clusters = np.random.choice(df['cluster_id'].unique(), size=2, replace=False)
cluster_sample = df[df['cluster_id'].isin(selected_clusters)]
print("Selected clusters:", selected_clusters)
cluster_sample.head()

Selected clusters: [1 9]


Unnamed: 0,Country,Year,Smartphone Sales (Millions),Laptop Shipments (Millions),Gaming Console Adoption (%),Smartwatch Penetration (%),Average Consumer Spending on Gadgets ($),E-Waste Generated (Metric Tons),5G Penetration Rate (%),cluster_id
11,China,2015,77.62,87.58,18.77,10.89,2018.37,1187.13,28.59,1
12,China,2016,93.38,137.35,21.03,23.74,1655.0,563.9,26.13,1
13,China,2017,499.89,99.05,22.01,5.32,1465.64,1037.5,12.43,1
14,China,2018,478.75,45.72,35.63,2.04,2653.45,1325.48,58.19,1
15,China,2019,254.1,130.87,16.48,19.44,1491.33,1825.14,13.12,1
