In [1]:
#@title Install dependencies
from IPython.display import clear_output, display, Image

clear_output()

### Imports

In [26]:
import os
import subprocess
import pandas as pd
import re
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import networkx as nx
import plotly.express as px
import streamlit as st

clear_output()

In [27]:
df = pd.read_csv("data/YOLO+RAM_merged.csv")
df.drop(df.columns[0], axis=1,inplace=True)

In [28]:
df.head()

Unnamed: 0,Image,RAM_Tags,YOLO_Tags,location,locationID,videoID,aggID
0,84.jpg,car | city street | crack | crosswalk | curb |...,"1 person, 15 car, 1 traffic light",park,1,1,1100084
1,98.jpg,car | city street | crack | crosswalk | curb |...,14 car,park,1,1,1100098
2,112.jpg,black | car | city street | crack | crosswalk ...,"1 person, 8 car, 1 traffic light",park,1,1,1100112
3,126.jpg,car | city street | crack | crosswalk | curb |...,"1 person, 13 car, 1 traffic light",park,1,1,1100126
4,140.jpg,car | city street | crack | crosswalk | curb |...,"1 person, 9 car, 1 traffic light",park,1,1,1100140


### Co-occurence Matrix

In [29]:
all_tags = set()
for tags in df['RAM_Tags']:
    all_tags.update(tags.split(' | '))
all_tags = sorted(list(all_tags))

# Creating co-occurrence matrices
co_occurrence_overall = pd.DataFrame(index=all_tags, columns=all_tags).fillna(0)

# Function to update co-occurrence matrices
def update_co_occurrence(df, co_occurrence_matrix):
    for _, row in df.iterrows():
        tags = row['RAM_Tags'].split(' | ')
        for i in range(len(tags)):
            for j in range(i+1, len(tags)):
                co_occurrence_matrix.at[tags[i], tags[j]] += 1
                co_occurrence_matrix.at[tags[j], tags[i]] += 1


co_occurrence_overall_total = []

# Updating co-occurrence matrices and normalizing by location
for location, df_location in df.groupby('location'):
    co_occurrence_grouped = co_occurrence_overall.copy()
    update_co_occurrence(df_location, co_occurrence_grouped) # Count occurrences

    # Normalize the counts for each loc
    co_occurrence_grouped /= df_location.shape[0] 
    co_occurrence_overall_total.append((location,co_occurrence_grouped))

    # Saving each location dataframe of the co-occurrence matrix
    co_occurrence_grouped.to_csv(f"data/{location}_cooccurrence.csv")
    
# Average the matrices
co_occurrence_overall = sum(comat_loc[1] for comat_loc in co_occurrence_overall_total) / len(co_occurrence_overall_total)

# Save the combined co-occurrence matrix
co_occurrence_overall.to_csv(f"data/total_cooccurrence.csv")

In [20]:
# Top N co-occuring pairs
def top_n_cooccur_pairs(df, location, top_n):
    
    pairs = df.stack().reset_index()
    pairs.columns = ['Term1', 'Term2', 'Co-occurrence']
    pairs = pairs[pairs['Term1'] < pairs['Term2']]
    top_pairs = pairs.sort_values(by='Co-occurrence', ascending=False).head(top_n)
    
    print(f"Top Co-occurring Pairs in {location.capitalize()}:")

    return top_pairs


# Top N tags by overall co-occurrences
def top_n_common_item(df, location, top_n):
    tag_sums = df.sum(axis=1)
    top_n_tags = tag_sums.sort_values(ascending=False).head(top_n)
    print(f"\nTop Tags by Overall Co-occurrences in {location.capitalize()}:")
    print(top_n_tags)    

In [24]:
person_associated_words = [
    'baby',
    'boy',
    'businessman',
    'child',
    'construction worker',
    'couple',
    'daughter',
    'girl',
    'man',
    'mother',
    'nun',
    'officer',
    'pedestrian',
    'person',
    'protester',
    'runner',
    'skater',
    'skateboarder',
    'student',
    'woman'
]

## Chase
Chase has the highest people to car interaction than any other locations

In [16]:
chase_df = pd.read_csv('data/chase_cooccurrence.csv', index_col=0)

In [22]:
top_n_cooccur_pairs(chase_df, 'chase', 30) 

Top Co-occurring Pairs in Chase:


Unnamed: 0,Term1,Term2,Co-occurrence
18195,city street,road,0.925821
61245,road,street corner,0.915931
18240,city street,street corner,0.879901
21550,crosswalk,road,0.806605
13620,car,road,0.790533
18065,city street,crosswalk,0.790004
51135,pavement,road,0.787178
13479,car,city street,0.771106
21245,cross,road,0.770399
52050,person,road,0.767573


In [37]:
top_n_common_item(co_occurrence_overall_total[0][1], co_occurrence_overall_total[0][0], 10) 


Top Tags by Overall Co-occurrences in Chase:
road             11.256446
city street      10.754327
street corner    10.655422
crosswalk         9.392264
car               9.256093
pavement          9.132109
person            9.070293
cross             9.066937
intersection      8.658778
walk              8.431473
dtype: float64


In [25]:
chase_df.loc['car', person_associated_words].sum()

1.5990815966089715

## Dumbo

In [38]:
top_n_cooccur_pairs(co_occurrence_overall_total[1][1], co_occurrence_overall_total[1][0], 10) 

Top Co-occurring Pairs in Dumbo:
crosswalk     road            0.992908
intersection  road            0.783688
crosswalk     intersection    0.782270
city street   road            0.732624
              crosswalk       0.726950
              intersection    0.707801
city          road            0.705674
              crosswalk       0.702128
              city street     0.679433
              intersection    0.672340
dtype: float64


In [39]:
top_n_common_item(co_occurrence_overall_total[1][1], co_occurrence_overall_total[1][0], 10) 


Top Tags by Overall Co-occurrences in Dumbo:
road             8.933333
crosswalk        8.866667
intersection     7.141844
city street      6.722695
city             6.456738
car              5.342553
cross            5.060284
street sign      4.941844
person           4.456028
street corner    4.193617
dtype: float64


In [33]:
co_occurrence_overall_total[1][1].loc['car', person_associated_words].sum()

0.4191489361702127

In [60]:
co_occurrence_overall_total[1][1].to_csv("data/dumbo_cooccurrence.csv")

## Park

In [40]:
top_n_cooccur_pairs(co_occurrence_overall_total[2][1], co_occurrence_overall_total[2][0], 10) 

Top Co-occurring Pairs in Park:
intersection  road             0.837262
drain         road             0.833520
city street   road             0.823045
drain         intersection     0.821549
city street   intersection     0.819678
              drain            0.819304
crack         road             0.817434
road          street corner    0.817434
car           road             0.816685
              intersection     0.813318
dtype: float64


In [41]:
top_n_common_item(co_occurrence_overall_total[2][1], co_occurrence_overall_total[2][0], 10) 


Top Tags by Overall Co-occurrences in Park:
road             9.146652
intersection     8.928545
drain            8.888515
city street      8.783389
crack            8.706697
car              8.695847
street corner    8.684250
curb             8.607183
manhole cover    8.168350
crosswalk        7.940890
dtype: float64


In [30]:
co_occurrence_overall_total[2][1].loc['car', person_associated_words].sum()

0.12345679012345678

In [1]:
import dash
from dash import dcc, html, Input, Output
import pandas as pd
import plotly.graph_objects as go
from process import update_co_occurrence, create_filtered_word_counts_df, get_word_counts_and_co_occurrence
from plots import create_graph_from_co_occurrence, plot_network, get_tag_data_without_label, plot_tag_counts_by_location

# Load your data
file_path = 'data/'
df = pd.read_csv(file_path + 'YOLO+RAM_merged.csv')
df.drop(columns=['Unnamed: 0'], inplace=True)

word_counts_df_filtered, co_occurrence_overall, words_cps_1, words_cps_2, words_cps_3 = get_word_counts_and_co_occurrence(df)

In [2]:
word_counts_df_filtered

Unnamed: 0,Tag,RAM_Count
0,ambulance,20.333333
1,baby,8.000000
2,baby carriage,34.666667
3,backpack,6.333333
4,bag,38.666667
...,...,...
161,white,219.333333
162,woman,698.666667
163,writing,1.666667
164,yellow,23.666667
