# Exploritary Data Analysis

In [72]:
import pandas as pd
train = pd.read_csv('train_house_prices.csv')
test = pd.read_csv('test_house_prices.csv')

## 1. Basics

* Number of rows
* Number of columns

In [73]:
# shape(rows, columns)
print(f'Shape of train data = {train.shape}')
print(f'Shape of test data = {test.shape}')

Shape of train data = (1460, 81)
Shape of test data = (1459, 80)


* Make a table of the data types, amount of null values (Checking Missing Values) and description of all the features

In [115]:
data_types = train.dtypes
null_values = train.isnull().sum()
feature_names = train.columns
df_dt_null = pd.DataFrame({'Features':feature_names,'Data Types': data_types, 'Null Values': null_values})
df_dt_null = df_dt_null.sort_values(by='Null Values', ascending=False)
df_dt_null_markdown =df_dt_null.copy()
df_dt_null_markdown.drop('Features', inplace=True, axis=1)

In [116]:
print(df_dt_null_markdown.to_markdown())

|               | Data Types   |   Null Values |
|:--------------|:-------------|--------------:|
| PoolQC        | object       |          1453 |
| MiscFeature   | object       |          1406 |
| Alley         | object       |          1369 |
| Fence         | object       |          1179 |
| FireplaceQu   | object       |           690 |
| LotFrontage   | float64      |           259 |
| GarageYrBlt   | float64      |            81 |
| GarageCond    | object       |            81 |
| GarageType    | object       |            81 |
| GarageFinish  | object       |            81 |
| GarageQual    | object       |            81 |
| BsmtFinType2  | object       |            38 |
| BsmtExposure  | object       |            38 |
| BsmtQual      | object       |            37 |
| BsmtCond      | object       |            37 |
| BsmtFinType1  | object       |            37 |
| MasVnrArea    | float64      |             8 |
| MasVnrType    | object       |             8 |
| Electrical    | ob

| Features | DataType | Null Values | Description |
|:--------------|:--------|-----:| :-----------|
| PoolQC        | object  | 1453 |
| MiscFeature   | object  | 1406 |
| Alley         | object  | 1369 |
| Fence         | object  | 1179 |
| FireplaceQu   | object  |  690 |
| LotFrontage   | float64 |  259 |
| GarageYrBlt   | float64 |   81 |
| GarageCond    | object  |   81 |
| GarageType    | object  |   81 |
| GarageFinish  | object  |   81 |
| GarageQual    | object  |   81 |
| BsmtFinType2  | object  |   38 |
| BsmtExposure  | object  |   38 |
| BsmtQual      | object  |   37 |
| BsmtCond      | object  |   37 |
| BsmtFinType1  | object  |   37 |
| MasVnrArea    | float64 |    8 |
| MasVnrType    | object  |    8 |
| Electrical    | object  |    1 |
| Id            | int64   |    0 |
| Functional    | object  |    0 |
| Fireplaces    | int64   |    0 |
| KitchenQual   | object  |    0 |
| KitchenAbvGr  | int64   |    0 |
| BedroomAbvGr  | int64   |    0 |
| HalfBath      | int64   |    0 |
| FullBath      | int64   |    0 |
| BsmtHalfBath  | int64   |    0 |
| TotRmsAbvGrd  | int64   |    0 |
| GarageCars    | int64   |    0 |
| GrLivArea     | int64   |    0 |
| GarageArea    | int64   |    0 |
| PavedDrive    | object  |    0 |
| WoodDeckSF    | int64   |    0 |
| OpenPorchSF   | int64   |    0 |
| EnclosedPorch | int64   |    0 |
| 3SsnPorch     | int64   |    0 |
| ScreenPorch   | int64   |    0 |
| PoolArea      | int64   |    0 |
| MiscVal       | int64   |    0 |
| MoSold        | int64   |    0 |
| YrSold        | int64   |    0 |
| SaleType      | object  |    0 |
| SaleCondition | object  |    0 |
| BsmtFullBath  | int64   |    0 |
| HeatingQC     | object  |    0 |
| LowQualFinSF  | int64   |    0 |
| LandSlope     | object  |    0 |
| OverallQual   | int64   |    0 |
| HouseStyle    | object  |    0 |
| BldgType      | object  |    0 |
| Condition2    | object  |    0 |
| Condition1    | object  |    0 |
| Neighborhood  | object  |    0 |
| LotConfig     | object  |    0 |
| YearBuilt     | int64   |    0 |
| Utilities     | object  |    0 |
| LandContour   | object  |    0 |
| LotShape      | object  |    0 |
| Street        | object  |    0 |
| LotArea       | int64   |    0 |
| MSZoning      | object  |    0 |
| OverallCond   | int64   |    0 |
| YearRemodAdd  | int64   |    0 |
| 2ndFlrSF      | int64   |    0 |
| BsmtFinSF2    | int64   |    0 |
| 1stFlrSF      | int64   |    0 |
| CentralAir    | object  |    0 |
| MSSubClass    | int64   |    0 |
| Heating       | object  |    0 |
| TotalBsmtSF   | int64   |    0 |
| BsmtUnfSF     | int64   |    0 |
| BsmtFinSF1    | int64   |    0 |
| RoofStyle     | object  |    0 |
| Foundation    | object  |    0 |
| ExterCond     | object  |    0 |
| ExterQual     | object  |    0 |
| Exterior2nd   | object  |    0 |
| Exterior1st   | object  |    0 |
| RoofMatl      | object  |    0 |
| SalePrice     | int64   |    0 |


* Visualize Missing Values

In [117]:
import plotly.express as px

fig = px.bar(df_dt_null, x= 'Features', y= 'Null Values', color= 'Null Values', color_continuous_scale=px.colors.sequential.Plasma_r,height=400,)
fig.show()

* Heatmap of Missing Values

In [120]:
fig_heatmap = px.imshow(df_dt_null)
fig_heatmap.show()

TypeError: Object of type dtype[object_] is not JSON serializable

# 2. 