In [None]:
import plotly.graph_objects as go
import pandas as pd
import networkx as nx

# Load CSV data
df = pd.read_csv('board_games.csv')

# Create graph from CSV
G = nx.from_pandas_edgelist(df, 'source', 'target', ['value'])

# Get positions for the nodes (force-directed layout)
pos = nx.spring_layout(G)

# Node trace
node_trace = go.Scatter(
    x=[pos[node][0] for node in G.nodes()],
    y=[pos[node][1] for node in G.nodes()],
    mode='markers',
    marker=dict(
        size=[10 + 5 * G.degree[node] for node in G.nodes()],  # Dynamic size based on degree
        color=[G.degree[node] for node in G.nodes()],  # Dynamic color based on degree
        colorscale='Viridis',
        colorbar=dict(title='Node Connections')
    ),
    text=[f"Node: {node}, Degree: {G.degree[node]}" for node in G.nodes()],
    hoverinfo='text'
)

# Edge trace
edge_trace = go.Scatter(
    x=[],
    y=[],
    line=dict(width=2, color='gray'),  # Constant line width
    hoverinfo='none',
    mode='lines'
)

# Add edge positions based on the 'value' column
for edge in G.edges(data=True):
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_trace['x'] += (x0, x1, None)
    edge_trace['y'] += (y0, y1, None)

# Create the figure
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    title='Force-Directed Graph',
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=40),
                    xaxis=dict(showgrid=False, zeroline=False),
                    yaxis=dict(showgrid=False, zeroline=False))
                )

# Show the figure
fig.show()


In [4]:
import pandas as pd

# Load the stations.csv file
stations_df = pd.read_csv('toronto/stations.csv')

# Load the trips.csv file
trips_df = pd.read_csv('toronto/trips.csv')

# Create a dictionary to map station codes to station names
station_code_to_name = dict(zip(stations_df['code'], stations_df['name']))

# Map the start and end station codes to station names in the trips_df
trips_df['start_station_name'] = trips_df['start_station_code'].map(station_code_to_name)
trips_df['end_station_name'] = trips_df['end_station_code'].map(station_code_to_name)

# Drop the original start and end station codes
trips_df.drop(['start_station_code', 'end_station_code'], axis=1, inplace=True)

# Save the new CSV with station names
trips_df.to_csv('trips_with_station_names.csv', index=False)

print("New CSV file created with station names instead of codes.")


New CSV file created with station names instead of codes.


In [5]:
import pandas as pd

# Load the dataset
df = pd.read_csv('trips_with_station_names.csv')

# Create a dataframe for edges (trips between stations)
edges = df.groupby(['start_station_name', 'end_station_name']).size().reset_index(name='trip_count')

# Create a dataframe for nodes (total trips involving each station)
start_stations = df.groupby('start_station_name').size().reset_index(name='start_count')
end_stations = df.groupby('end_station_name').size().reset_index(name='end_count')

# Merge start and end station counts to get total appearances
nodes = pd.merge(start_stations, end_stations, left_on='start_station_name', right_on='end_station_name', how='outer')

# Fill NaN values and sum the counts
nodes['total_count'] = nodes['start_count'].fillna(0) + nodes['end_count'].fillna(0)

# Keep only station name and total count
nodes = nodes[['start_station_name', 'total_count']].rename(columns={'start_station_name': 'station_name'})

# Save the results to CSV files
edges.to_csv('aggregated_edges.csv', index=False)
nodes.to_csv('aggregated_nodes.csv', index=False)
