In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import matplotlib as mpl
from glob import glob

In [None]:
plt.rc("font", family="Malgun Gothic")
# plt.rc("font", family="D2Coding")
plt.rc("axes", unicode_minus=False)

In [None]:
file_list = glob("./data/bike/*")
print(len(file_list), file_list, sep="\n")

In [None]:
train = pd.read_csv(file_list[2], parse_dates=["datetime"])
train.shape

In [None]:
train.info()

In [None]:
train.head()

In [None]:
sns.set(font_scale=0.9)

In [None]:
train.hist(figsize=(12, 14), xrot=30)

In [None]:
train["temp"].describe()

In [None]:
train.isna().sum()

In [None]:
train["datetime"].head()

In [None]:
train["year"] = train["datetime"].dt.year
train["month"] = train["datetime"].dt.month
train["day"] = train["datetime"].dt.day
train["hour"] = train["datetime"].dt.hour

In [None]:
train[["datetime", "year", "month", "day", "hour"]].tail()

In [None]:
figure, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)
figure.set_size_inches(12, 10)

sns.barplot(train, 
            x="year", 
            y="count", 
            ax=ax1)
sns.barplot(train, 
            x="month", 
            y="count", 
            ax=ax2)
sns.barplot(train, 
            x="day", 
            y="count", 
            ax=ax3)
sns.barplot(train, 
            x="hour", 
            y="count", 
            ax=ax4)

ax1.set(title="연별 대여량")
ax2.set(title="월별 대여량")
ax3.set(title="일별 대여량")
ax4.set(title="시간별 대여량")

In [None]:
train["count"].describe()

In [None]:
fig, axes = plt.subplots(2, 2)
fig.set_size_inches(12, 10)

sns.boxplot(train, 
            y="count",
            ax=axes[0][0])
sns.boxplot(train, 
            x="season",
            y="count",
            ax=axes[0][1])
sns.boxplot(train, 
            x="hour",
            y="count",
            ax=axes[1][0])
sns.boxplot(train,
            x="workingday",
            y="count",
            ax=axes[1][1])

axes[0][0].set(title="대여량")
axes[0][1].set(title="계절별 대여량")
axes[1][0].set(title="시간별 대여량")
axes[1][1].set(title="근무일 여부에 따른 대여량")

In [None]:
train["dayofweek"] = train["datetime"].dt.dayofweek
train.shape

In [None]:
train["dayofweek"].value_counts()

In [None]:
fig, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(5)
fig.set_size_inches(12, 24)

sns.pointplot(train,
              x="hour",
              y="count",
              ax=ax1)
sns.pointplot(train,
              x="hour",
              y="count",
              hue="workingday",
              ax=ax2)
sns.pointplot(train,
              x="hour",
              y="count",
              hue="dayofweek",
              ax=ax3)
sns.pointplot(train,
              x="hour",
              y="count",
              hue="weather",
              ax=ax4)
sns.pointplot(train,
              x="hour",
              y="count",
              hue="season",
              ax=ax5)

In [None]:
corr_matrix_df = train[["temp", "atemp", "casual", "registered", "humidity", "windspeed", "count"]]
corr_matrix_df = corr_matrix_df.corr()
corr_matrix_df

In [None]:
mask = np.array(corr_matrix_df)
mask[np.tril_indices_from(mask)] = False

plt.figure(figsize=(12, 8))
sns.heatmap(corr_matrix_df,
            cmap="Blues",
            annot=True,
            fmt=".2f",
            mask=mask)

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(ncols=3)
fig.set_size_inches(12, 5)

sns.regplot(train, 
            x="temp", 
            y="count",
            ax=ax1)
sns.regplot(train, 
            x="windspeed", 
            y="count",
            ax=ax2)
sns.regplot(train,
            x="humidity",
            y="count",
            ax=ax3)

In [None]:
train["weather"].value_counts()

In [None]:
def concat_year_month(datetime):
    return f"{datetime.year}-{datetime.month}"

train["year month"] = train["datetime"].apply(concat_year_month)
train[["datetime", "year month"]].head()

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_size_inches(12, 5)

sns.barplot(train,
            x="year",
            y="count",
            ax=ax1)
sns.barplot(train,
            x="month",
            y="count",
            ax=ax2)

fig, ax3 = plt.subplots(1, 1)
fig.set_size_inches(12, 5)

sns.barplot(train,
            x="year month",
            y="count",
            ax=ax3)
plt.xticks(rotation=30)