In [9]:
import pandas as pd

# Series Example
s = pd.Series([10, 20, 30], index=['a', 'b', 'c'])
print("Series Example:\n", s.head(), "\n")


Series Example:
 a    10
b    20
c    30
dtype: int64 



In [10]:
# DataFrame Example
df_example = pd.DataFrame({
    'name': ['Lucy', 'Silver'],
    'age': [18, 23],
    'hobies': ['reading', 'music']
})
print("Example DataFrame:\n", df_example, "\n")


Example DataFrame:
      name  age   hobies
0    Lucy   18  reading
1  Silver   23    music 



In [11]:
import os
import pandas as pd

# Update this path to where your SeoulBikeData.csv is actually located
csv_path = r"C:\Users\welcome\Documents\SeoulBikeData.csv"  # <-- change path

# Check if file exists before loading
if not os.path.exists(csv_path):
    raise FileNotFoundError(f"File not found at: {csv_path}\nPlease update the path above.")

# Load Seoul Bike Dataset
df = pd.read_csv(csv_path, encoding="ISO-8859-1")
print("First 15 rows:\n", df.head(15), "\n")


First 15 rows:
           Date  Rented Bike Count  Hour  Temperature(°C)  Humidity(%)  \
0   01/12/2017                254     0             -5.2           37   
1   01/12/2017                204     1             -5.5           38   
2   01/12/2017                173     2             -6.0           39   
3   01/12/2017                107     3             -6.2           40   
4   01/12/2017                 78     4             -6.0           36   
5   01/12/2017                100     5             -6.4           37   
6   01/12/2017                181     6             -6.6           35   
7   01/12/2017                460     7             -7.4           38   
8   01/12/2017                930     8             -7.6           37   
9   01/12/2017                490     9             -6.5           27   
10  01/12/2017                339    10             -3.5           24   
11  01/12/2017                360    11             -0.5           21   
12  01/12/2017                449  

In [12]:
# Fill missing values (mean for numeric columns)
df = df.fillna(df.mean(numeric_only=True))


In [13]:
# Rename columns
df.rename(columns={"Rented Bike Count": "Rented_Bike_Count"}, inplace=True)


In [14]:
# Change datatype
df["Rented_Bike_Count"] = df["Rented_Bike_Count"].astype(float)


In [15]:
# Sort values
df.sort_values(["Temperature(°C)", "Rented_Bike_Count"], ascending=[True, False], inplace=True)


In [16]:
# Apply function to double the count
df["Rented_Bike_Count_Double"] = df["Rented_Bike_Count"].apply(lambda x: x * 2)


In [17]:
# GroupBy single column
store_group = df.groupby("Hour")["Rented_Bike_Count"].mean()
print("Average rentals by Hour:\n", store_group, "\n")


Average rentals by Hour:
 Hour
0      541.460274
1      426.183562
2      301.630137
3      203.331507
4      132.591781
5      139.082192
6      287.564384
7      606.005479
8     1015.701370
9      645.983562
10     527.821918
11     600.852055
12     699.441096
13     733.246575
14     758.824658
15     829.186301
16     930.621918
17    1138.509589
18    1502.926027
19    1195.147945
20    1068.964384
21    1031.449315
22     922.797260
23     671.126027
Name: Rented_Bike_Count, dtype: float64 



In [18]:
# GroupBy multiple columns
multi_group = df.groupby(["Hour", "Seasons"])["Rented_Bike_Count"].sum()
print("Total rentals by Hour and Season:\n", multi_group, "\n")


Total rentals by Hour and Season:
 Hour  Seasons
0     Autumn      56755.0
      Spring      43298.0
      Summer      82714.0
      Winter      14866.0
1     Autumn      44200.0
                   ...   
22    Winter      20262.0
23    Autumn      67127.0
      Spring      56066.0
      Summer     106159.0
      Winter      15609.0
Name: Rented_Bike_Count, Length: 96, dtype: float64 



In [19]:
# Aggregation
agg_df = df.groupby("Seasons").agg({"Rented_Bike_Count": "mean", "Temperature(°C)": "max"})
print("Aggregated data:\n", agg_df, "\n")


Aggregated data:
          Rented_Bike_Count  Temperature(°C)
Seasons                                    
Autumn          819.597985             30.5
Spring          730.031250             29.4
Summer         1034.073370             39.4
Winter          225.541204             10.3 



In [20]:
# Date conversion
df["Date"] = pd.to_datetime(df["Date"], format="%d/%m/%Y")


In [21]:
# Create small DataFrame for merge
prod_info = pd.DataFrame({
    "Hour": [0, 1, 2],
    "Category": ["Late Night", "Early Morning", "Early Morning"]
})


In [22]:
# Merge
merged_df = df.merge(prod_info, on="Hour", how="left")


In [23]:
# Pivot table
pivot_df = pd.pivot_table(df, values="Rented_Bike_Count", index="Hour", columns="Seasons", aggfunc="sum")
print("Pivot Table:\n", pivot_df, "\n")


Pivot Table:
 Seasons    Autumn    Spring    Summer   Winter
Hour                                          
0         56755.0   43298.0   82714.0  14866.0
1         44200.0   32755.0   64287.0  14315.0
2         30198.0   22767.0   46529.0  10601.0
3         20524.0   15163.0   31526.0   7003.0
4         13522.0    9740.0   20591.0   4543.0
5         13073.0   10456.0   22626.0   4610.0
6         28759.0   23151.0   44697.0   8354.0
7         63899.0   55376.0   83056.0  18861.0
8        108948.0   93274.0  130511.0  37998.0
9         68723.0   60335.0   83812.0  22914.0
10        57937.0   51380.0   66562.0  16776.0
11        65039.0   61332.0   72391.0  20549.0
12        75706.0   75274.0   80589.0  23727.0
13        81216.0   79748.0   81898.0  24773.0
14        85765.0   83310.0   82310.0  25586.0
15        93693.0   89188.0   92894.0  26878.0
16       104750.0   99131.0  108024.0  27772.0
17       126593.0  117718.0  140419.0  30826.0
18       160650.0  152038.0  196433.0  39447.0

In [24]:
# Save results
df.to_csv("seoulbike_processed.csv", index=False)
df.to_excel("seoulbike_processed.xlsx", index=False)

print("Processing complete. Files saved.")


Processing complete. Files saved.
