In [None]:
import pandas as pd

In [None]:
boston_url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-ST0151EN-SkillsNetwork/labs/boston_housing.csv'
boston_df=pd.read_csv(boston_url)

In [None]:
import matplotlib.pyplot as plt
# Create a boxplot for the "MEDV" (Median value of owner-occupied homes) variable
plt.figure(figsize=(8, 6))
plt.boxplot(boston_df['MEDV'])
plt.title("Boxplot of Median Value of Owner-Occupied Homes")
plt.xlabel("Owner-Occupied Homes")
plt.ylabel("Median Value")
plt.show()

In [None]:
# Count the occurrences of each value in the "CHAS" (Charles river) variable
charles_counts = boston_df['CHAS'].value_counts()

# Create a bar plot for the Charles river variable
plt.figure(figsize=(8, 6))
charles_counts.plot(kind='bar')
plt.title("Bar Plot of Charles River Variable")
plt.xlabel("Charles River")
plt.ylabel("Count")
plt.show()

In [None]:
# Discretize the "AGE" variable into three groups: 35 and younger, between 35 and 70, and 70 and older
age_bins = [0, 35, 70, float('inf')]
age_labels = ['35 and Younger', 'Between 35 and 70', '70 and Older']
boston_df['AGE_Group'] = pd.cut(boston_df['AGE'], bins=age_bins, labels=age_labels)

# Create a boxplot for the MEDV (Median value of owner-occupied homes) variable by AGE group
plt.figure(figsize=(10, 6))
boston_df.boxplot(column='MEDV', by='AGE_Group')
plt.title("Boxplot of MEDV vs AGE")
plt.xlabel("Age Group")
plt.ylabel("Median Value")
plt.suptitle("")  # Remove default title
plt.show()

In [None]:
# Create a scatter plot for NOX vs INDUS
plt.figure(figsize=(8, 6))
plt.scatter(boston_df['NOX'], boston_df['INDUS'])
plt.title("Scatter Plot: NOX vs INDUS")
plt.xlabel("Nitric Oxide Concentrations")
plt.ylabel("Proportion of Non-Retail Business Acres")
plt.show()

In [None]:
# Create a histogram for the pupil to teacher ratio variable
plt.figure(figsize=(8, 6))
plt.hist(boston_df['PTRATIO'], bins=10)
plt.title("Histogram: Pupil-to-Teacher Ratio")
plt.xlabel("Pupil-to-Teacher Ratio")
plt.ylabel("Frequency")
plt.show()

In [None]:
from scipy.stats import ttest_ind

In [None]:
# Extract the MEDV values for houses bounded by the Charles river and those not bounded by the Charles river
charles_river_medv = boston_df[boston_df['CHAS'] == 1]['MEDV']
non_charles_river_medv = boston_df[boston_df['CHAS'] == 0]['MEDV']

# Perform the t-test for independent samples
t_stat, p_value = ttest_ind(charles_river_medv, non_charles_river_medv)

# Compare the p-value with the significance level (α)
alpha = 0.05
if p_value < alpha:
    conclusion = "Reject the null hypothesis. There is a significant difference in the median value of houses bounded by the Charles river and those not bounded by the Charles river."
else:
    conclusion = "Fail to reject the null hypothesis. There is no significant difference in the median value of houses bounded by the Charles river and those not bounded by the Charles river."

print("T-statistic:", t_stat)
print("P-value:", p_value)
print("Conclusion:", conclusion)

In [None]:
from scipy.stats import f_oneway

# Perform one-way ANOVA to test for differences in median values of houses for each AGE group
age_groups = [boston_df[boston_df['AGE_Group'] == group]['MEDV'] for group in age_labels]
f_stat, p_value = f_oneway(*age_groups)

# Compare the p-value with the significance level (α)
alpha = 0.05
if p_value < alpha:
    conclusion = "Reject the null hypothesis. There is a difference in median values of houses for each proportion of owner occupied units built prior to 1940 (AGE)."
else:
    conclusion = "Fail to reject the null hypothesis. There is no difference in median values of houses for each proportion of owner occupied units built prior to 1940 (AGE)."

print("F-statistic:", f_stat)
print("P-value:", p_value)
print("Conclusion:", conclusion)

In [None]:
from scipy.stats import pearsonr

# Calculate the Pearson correlation coefficient and the p-value
corr, p_value = pearsonr(boston_df['NOX'], boston_df['INDUS'])

# Compare the p-value with the significance level (α)
alpha = 0.05
if p_value < alpha:
    conclusion = "Reject the null hypothesis. There is a relationship between Nitric oxide concentrations and proportion of non-retail business acres per town."
else:
    conclusion = "Fail to reject the null hypothesis. There is no relationship between Nitric oxide concentrations and proportion of non-retail business acres per town."

print("Pearson correlation coefficient:", corr)
print("P-value:", p_value)
print("Conclusion:", conclusion)

In [None]:
from scipy.stats import pearsonr

# Calculate the Pearson correlation coefficient and the p-value
corr, p_value = pearsonr(boston_df['NOX'], boston_df['INDUS'])

# Compare the p-value with the significance level (α)
alpha = 0.05
if p_value < alpha:
    conclusion = "Reject the null hypothesis. There is a relationship between Nitric oxide concentrations and proportion of non-retail business acres per town."
else:
    conclusion = "Fail to reject the null hypothesis. There is no relationship between Nitric oxide concentrations and proportion of non-retail business acres per town."

print("Pearson correlation coefficient:", corr)
print("P-value:", p_value)
print("Conclusion:", conclusion)

In [None]:
import statsmodels.api as sm

# Add a constant term to the predictor variable
X = sm.add_constant(boston_df['DIS'])

# Fit the linear regression model
model = sm.OLS(boston_df['MEDV'], X)
results = model.fit()

# Extract the coefficient and p-value for the additional weighted distance (DIS)
coefficient = results.params['DIS']
p_value = results.pvalues['DIS']

# Compare the p-value with the significance level (α)
alpha = 0.05
if p_value < alpha:
    conclusion = "Reject the null hypothesis. The additional weighted distance to the five Boston employment centres has an impact on the median value of owner-occupied homes."
else:
    conclusion = "Fail to reject the null hypothesis. The additional weighted distance to the five Boston employment centres has no impact on the median value of owner-occupied homes."

print("Coefficient:", coefficient)
print("P-value:", p_value)
print("Conclusion:", conclusion)