## Config

In [None]:
# parameters
SAVE_OUTPUT = True
SIZE_PLOTS = (12,10)
YEAR = 2017

#Location of the data
INPUT_DATA_PATH = "../data/raw/"
INPUT_PROCESSED_DATA_PATH = "../data/processed/"
OUTPUT_DATA_PATH = "../data/interim/"

In [None]:
# Parameters
YEAR = 2018
SAVE_OUTPUT = True


In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import geoarrow.pandas as _
import arrow
from pathlib import Path
import json
import csv

## Load data

In [None]:
df = pd.read_csv(f"{INPUT_DATA_PATH}/{YEAR}_aforament_descripcio.csv", sep=",", on_bad_lines='skip')
stations_data = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.Longitud, df.Latitud))
stations_data = stations_data.set_crs(epsg=4326)

## Data management

In [None]:
print(stations_data.shape)
print(stations_data.columns)
stations_data.head()

In [None]:
stations_data.dtypes

## Select Bicycle Counters

In [None]:
stations_data["Desc_tipus_aforament"].value_counts(dropna=False)

In [None]:
if YEAR != 2017:
    bike_stations_data=stations_data[stations_data["Desc_tipus_aforament"]=="Bicis"].copy()
else:
    bike_stations_data=stations_data[stations_data["Desc_tipus_aforament"]=="bicis"].copy()

In [None]:
bike_stations_data_clean=bike_stations_data.drop(columns=['Longitud', 'Latitud', 'X_ETRS89', 'Y_ETRS89','Codi_tipus_aforament','Desc_tipus_aforament']).copy()
bike_stations_data_clean.head()


## Save output

In [None]:
if SAVE_OUTPUT:
    bike_stations_data_clean.to_parquet(f'{OUTPUT_DATA_PATH}/stations_data{YEAR}.parquet')

In [None]:
# Define the new row data
new_row = {
    'Year': YEAR,
    'Columns': stations_data.shape[1],
    'Counters': stations_data.shape[0],
    'Bike Counters': bike_stations_data.shape[0],
    'Car Counters': stations_data.shape[0]-bike_stations_data.shape[0]
}

# Convert the new row data to a DataFrame
new_row_df = pd.DataFrame([new_row])

# Define the path to the CSV file
csv_file_path = f"{INPUT_PROCESSED_DATA_PATH}/log_stations.csv"

# Check if the CSV file exists
if Path(csv_file_path).is_file():
    # If the file exists, append the new row
    new_row_df.to_csv(csv_file_path, mode='a', header=False, index=False)
else:
    # If the file does not exist, create it with the new row
    new_row_df.to_csv(csv_file_path, mode='w', header=True, index=False)

## Watermark

In [None]:
!python -m pip install watermark --quiet

In [None]:
%load_ext watermark

In [None]:
%watermark

In [None]:
%watermark --iversions

In [None]:
!lsb_release -a