<a href="https://colab.research.google.com/github/amirmohammadkalateh/analyze/blob/main/2_FS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from scipy import stats
from sklearn.preprocessing import StandardScaler

# Read and prepare the data
df = pd.read_csv('global_housing_market_extended.csv')

# 1. Basic Statistical Analysis
print("\n1. Basic Statistical Analysis:")
print("\nDescriptive Statistics:")
print(df.describe())

print("\nSkewness:")
print(df.select_dtypes(include=[np.number]).skew())

print("\nKurtosis:")
print(df.select_dtypes(include=[np.number]).kurtosis())

# 2. Time Series Analysis
print("\n2. Time Series Analysis:")
avg_prices_by_year = df.groupby('Year')['House Price Index'].mean()
print("\nAverage House Price Index by Year:")
print(avg_prices_by_year)

# 3. Correlation Analysis
correlation_matrix = df.select_dtypes(include=[np.number]).corr()
print("\n3. Correlation Matrix:")
print(correlation_matrix)

# 4. Group Analysis by Country
print("\n4. Country-wise Analysis:")
country_stats = df.groupby('Country').agg({
    'House Price Index': ['mean', 'std', 'min', 'max'],
    'Affordability Ratio': 'mean',
    'GDP Growth (%)': 'mean'
}).round(2)
print(country_stats)

# Visualizations

# 1. Time Series Plot
plt.figure(figsize=(15, 8))
for country in df['Country'].unique()[:5]:  # Plot first 5 countries for clarity
    country_data = df[df['Country'] == country]
    plt.plot(country_data['Year'], country_data['House Price Index'], label=country)
plt.title('House Price Index Trends (Top 5 Countries)')
plt.xlabel('Year')
plt.ylabel('House Price Index')
plt.legend()
plt.tight_layout()
plt.savefig('time_series.png')
plt.close()

# 2. Correlation Heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Heatmap')
plt.tight_layout()
plt.savefig('correlation_heatmap.png')
plt.close()

# 3. Box Plot
plt.figure(figsize=(15, 6))
sns.boxplot(x='Country', y='House Price Index', data=df)
plt.xticks(rotation=45)
plt.title('House Price Index Distribution by Country')
plt.tight_layout()
plt.savefig('boxplot.png')
plt.close()

# 4. Distribution Plots
plt.figure(figsize=(15, 10))
for i, column in enumerate(['House Price Index', 'Affordability Ratio', 'GDP Growth (%)', 'Inflation Rate (%)'], 1):
    plt.subplot(2, 2, i)
    sns.histplot(data=df, x=column, kde=True)
    plt.title(f'Distribution of {column}')
plt.tight_layout()
plt.savefig('distributions.png')
plt.close()

# 5. 3D Scatter Plot
fig = px.scatter_3d(df,
                    x='House Price Index',
                    y='Affordability Ratio',
                    z='GDP Growth (%)',
                    color='Country',
                    title='3D Relationship between House Prices, Affordability, and GDP Growth')
fig.write_html('3d_scatter.html')

# 6. Pair Plot
sns.pairplot(df[['House Price Index', 'Affordability Ratio', 'GDP Growth (%)', 'Inflation Rate (%)']])
plt.savefig('pairplot.png')
plt.close()

# Statistical Tests
print("\n5. Statistical Tests:")

# Normality Tests
for column in ['House Price Index', 'Affordability Ratio', 'GDP Growth (%)', 'Inflation Rate (%)']:
    stat, p_value = stats.normaltest(df[column])
    print(f"\nNormality test for {column}:")
    print(f"p-value: {p_value}")

# ANOVA Test
f_stat, p_value = stats.f_oneway(*[group['House Price Index'].values
                                  for name, group in df.groupby('Country')])
print("\nANOVA test for House Price Index between countries:")
print(f"p-value: {p_value}")

print("\nAnalysis completed! Check the generated visualization files.")



1. Basic Statistical Analysis:

Descriptive Statistics:
              Year  House Price Index  Rent Index  Affordability Ratio  \
count   200.000000         200.000000  200.000000           200.000000   
mean   2019.500000         130.381022   83.048370             7.237768   
std       2.879489          28.752229   21.439858             2.576085   
min    2015.000000          80.552212   50.354311             3.041688   
25%    2017.000000         104.142562   60.466671             5.034207   
50%    2019.500000         129.193653   83.721711             7.375697   
75%    2022.000000         157.127098  100.604665             9.276196   
max    2024.000000         179.971767  119.855388            11.879671   

       Mortgage Rate (%)  Inflation Rate (%)  GDP Growth (%)  \
count         200.000000          200.000000      200.000000   
mean            4.150621            3.649756        2.133722   
std             1.380222            1.881938        2.413270   
min             1.53

In [2]:
# 7. Bar Chart
plt.figure(figsize=(15, 6))
avg_prices = df.groupby('Country')['House Price Index'].mean().sort_values(ascending=False)
sns.barplot(x=avg_prices.index, y=avg_prices.values)
plt.title('Average House Price Index by Country')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('bar_chart.png')
plt.close()

# 8. Pie Chart
plt.figure(figsize=(10, 10))
urbanization_avg = df.groupby('Country')['Urbanization Rate (%)'].mean()
plt.pie(urbanization_avg, labels=urbanization_avg.index, autopct='%1.1f%%')
plt.title('Distribution of Urbanization Rates by Country')
plt.axis('equal')
plt.savefig('pie_chart.png')
plt.close()