In [1]:
# Paired t-test: A company has launched some offer
# H0: Pre and post usage is the same (i.e., offer is not effective)

In [2]:
# Import necessary libraries
import pandas as pd  # For handling datasets
from scipy.stats import ttest_rel  # For performing the paired t-test
from scipy.stats import t  # For computing critical values

# Load the dataset (Ensure the file path is correct)
# data = pd.read_csv("path_to_your_dataset.csv")  # Uncomment and modify when using an actual dataset

# Hypothesis:
# H0: Pre and post usage is the same (offer is not effective)
# H1: There is a difference in pre and post usage (offer is effective)

# Preview the dataset structure
# print(data.head())  # Uncomment this after loading the dataset

In [3]:
# Load the dataset from the specified path
data = pd.read_csv("C:/Users/dbda.STUDENTSDC/Music/LabPractice/Notebooks/Datasets/cust_seg.csv")

# Extract relevant columns for the paired t-test: pre-usage and post-usage after 2 months
usage_data = data[["pre_usage", "post_usage_2ndmonth"]]

# Display the first few rows of the dataset to understand its structure
print(data.head())  # Ensure correct loading and verify column names

   custid  sex  AqChannel  region  Marital_status  segment  pre_usage  \
0      70    0          4       1               1        1         57   
1     121    1          4       2               1        3         68   
2      86    0          4       3               1        1         44   
3     141    0          4       3               1        3         63   
4     172    0          4       2               1        2         47   

   Post_usage_1month  Latest_mon_usage  post_usage_2ndmonth  
0                 52              49.2                 57.2  
1                 59              63.6                 64.9  
2                 33              64.8                 36.3  
3                 44              56.4                 48.4  
4                 52              68.4                 57.2  


In [4]:
# Perform a paired t-test to compare pre-usage and post-usage after 2 months
# H0: Pre-usage and post-usage are the same (the offer is not effective)
# H1: There is a significant difference (the offer is effective)

tstats, pval = ttest_rel(usage_data["pre_usage"], usage_data["post_usage_2ndmonth"])

# Print the results
print("T-statistic:", tstats)  # Measures the difference between paired samples
print("P-value:", pval)  # Determines statistical significance

T-statistic: -8.866832246938742
P-value: 4.295733828012836e-16


In [5]:
# Calculate the degrees of freedom for the paired t-test
# Degrees of freedom (df) = Number of pairs - 1
degrees = len(usage_data["pre_usage"]) - 1  

# Print the computed degrees of freedom for verification
print("Degrees of Freedom:", degrees)

Degrees of Freedom: 199


In [6]:
# Define the significance level for hypothesis testing
alpha = 0.025  # Threshold for statistical significance

# Calculate the critical value for the t-distribution
# This helps determine the rejection region for hypothesis testing
critical_value = t.ppf(1 - alpha, degrees)

# Print the significance level and critical value for verification
print("Significance Level (Alpha):", alpha)
print("Critical Value:", critical_value)

Significance Level (Alpha): 0.025
Critical Value: 1.971956544249395


In [7]:
# Print the results of the paired t-test in a structured format
print(f"T-statistic: {tstats:.2f}, p-value: {pval:.4f}")  
# Displays the test statistic and p-value with appropriate precision

print(f"Critical value (alpha = {alpha}, two-tailed): +- {critical_value:.2f}")  
# Shows the critical value based on the defined significance level

T-statistic: -8.87, p-value: 0.0000
Critical value (alpha = 0.025, two-tailed): +- 1.97


In [8]:
# Compare the p-value with the significance level (alpha) to determine the test result
if pval < alpha:
    print("Reject null hypothesis")  # Significant difference in pre- and post-usage → Offer is effective
else:
    print("Fail to reject null hypothesis")  # No significant difference → Offer is not effective

Reject null hypothesis
