In [19]:
import pandas as pd
import zipfile
import os

# Download the dataset (uncomment the next two lines if you haven't downloaded it yet)
# import kaggle
# !kaggle datasets download -d hmavrodiev/london-bike-sharing-dataset

zipfile_name = 'london-bike-sharing-dataset.zip'
extracted_folder = 'london_bike_data'

with zipfile.ZipFile(zipfile_name, 'r') as file:
    file.extractall(extracted_folder)

# Read the CSV file
bikes = pd.read_csv(os.path.join(extracted_folder, "london_merged.csv"))

# Explore the data
bikes.info()

# Map the weather and season values to their corresponding strings
weather_dict = {
    1: 'Clear',
    2: 'Scattered clouds',
    3: 'Broken clouds',
    4: 'Cloudy',
    7: 'Rain',
    10: 'Rain with thunderstorm',
    26: 'Snowfall'
}

season_dict = {
    0: 'spring',
    1: 'summer',
    2: 'autumn',
    3: 'winter'
}

bikes['weather'] = bikes['weather_code'].map(weather_dict)
bikes['season'] = bikes['season'].map(season_dict)

# Convert humidity to a percentage
bikes['hum'] /= 100

# Convert timestamp to datetime
bikes['timestamp'] = pd.to_datetime(bikes['timestamp'])

# Rename columns
bikes.rename(columns={
    'cnt': 'count',
    't1': 'temp_real_C',
    't2': 'temp_feels_like_C',
    'hum': 'humidity_percent',
    'wind_speed': 'wind_speed_kph',
    'is_holiday': 'is_holiday',
    'is_weekend': 'is_weekend',
    'season': 'season'
}, inplace=True)

# Convert is_holiday and is_weekend to boolean
bikes['is_holiday'] = bikes['is_holiday'].astype(bool)
bikes['is_weekend'] = bikes['is_weekend'].astype(bool)

# Drop the columns we no longer need
bikes.drop(columns=['weather_code'], inplace=True)

# Save the DataFrame to a CSV file (Excel is not required for Tableau visualizations)
bikes.to_csv('london_bikes_final.csv', index=False)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17414 entries, 0 to 17413
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   timestamp     17414 non-null  object 
 1   cnt           17414 non-null  int64  
 2   t1            17414 non-null  float64
 3   t2            17414 non-null  float64
 4   hum           17414 non-null  float64
 5   wind_speed    17414 non-null  float64
 6   weather_code  17414 non-null  float64
 7   is_holiday    17414 non-null  float64
 8   is_weekend    17414 non-null  float64
 9   season        17414 non-null  float64
dtypes: float64(8), int64(1), object(1)
memory usage: 1.3+ MB
