## Libraries

In [25]:
# Para visualizar grafos:

import plotly.graph_objects as go
import ipywidgets as widgets
import holoviews as hv

In [26]:
from google.colab import output
output.enable_custom_widget_manager()

In [27]:
!pip install ipycytoscape



In [28]:
import json
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

In [29]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Análisis exploratorio TROUT

In [30]:
import json

json_path = "/content/drive/MyDrive/Colab Notebooks/C3_Visualización_Información/VI - VAST Challenge 2/TROUT.json"

with open(json_path, 'r', encoding='utf-8') as f:
    trout_data = json.load(f)


In [31]:
links = trout_data.get("links", [])
for i in range(5):
    print(links[i])


{'role': 'plan', 'source': 'marine_life_deck_Meeting_12_Environmental_Impact_Report', 'target': 'marine_life_deck', 'key': 0}
{'role': 'participant', 'sentiment': -0.5, 'reason': 'Prefers resources to be allocated toward the fishing industry.', 'industry': ['tourism'], 'source': 'marine_life_deck_Meeting_12_Environmental_Impact_Report', 'target': 'Teddy Goldstein', 'key': 0}
{'role': 'plan', 'source': 'low_volume_crane_Meeting_8_Report', 'target': 'low_volume_crane', 'key': 0}
{'role': 'participant', 'sentiment': 0.1, 'reason': "Recognizes the crane's benefit to small-scale operations.", 'industry': ['small vessel'], 'source': 'low_volume_crane_Meeting_8_Report', 'target': 'Seal', 'key': 0}
{'role': 'plan', 'source': 'deep_fishing_dock_Meeting_3_Maintenance_Plan', 'target': 'deep_fishing_dock', 'key': 0}


In [32]:
sentiment_records = []

for link in links:
    if link.get('role') == 'participant' and 'sentiment' in link:
        industry_list = link.get('industry')
        industry = industry_list[0] if isinstance(industry_list, list) and industry_list else 'unknown'

        record = {
            "person": link['target'],
            "proposal": link['source'],
            "sentiment": link['sentiment'],
            "industry": industry,
            "reason": link.get('reason', '')
        }
        sentiment_records.append(record)

import pandas as pd
df_sentiment = pd.DataFrame(sentiment_records)
df_sentiment.head()


Unnamed: 0,person,proposal,sentiment,industry,reason
0,Teddy Goldstein,marine_life_deck_Meeting_12_Environmental_Impa...,-0.5,tourism,Prefers resources to be allocated toward the f...
1,Seal,low_volume_crane_Meeting_8_Report,0.1,small vessel,Recognizes the crane's benefit to small-scale ...
2,Teddy Goldstein,deep_fishing_dock_Meeting_3_Maintenance_Plan,,unknown,
3,Industrial Shipping,deep_fishing_dock_Meeting_3_Maintenance_Plan,,unknown,
4,Ed Helpsford,concert_Meeting_7_Discussion,0.5,tourism,Believes concerts can bring community together...


In [33]:
df_sentiment

Unnamed: 0,person,proposal,sentiment,industry,reason
0,Teddy Goldstein,marine_life_deck_Meeting_12_Environmental_Impa...,-0.5,tourism,Prefers resources to be allocated toward the f...
1,Seal,low_volume_crane_Meeting_8_Report,0.1,small vessel,Recognizes the crane's benefit to small-scale ...
2,Teddy Goldstein,deep_fishing_dock_Meeting_3_Maintenance_Plan,,unknown,
3,Industrial Shipping,deep_fishing_dock_Meeting_3_Maintenance_Plan,,unknown,
4,Ed Helpsford,concert_Meeting_7_Discussion,0.5,tourism,Believes concerts can bring community together...
...,...,...,...,...,...
89,Ed Helpsford,affordable_housing_Meeting_8_Feedback,1.0,large vessel,Feels committee should be doing more for blue-...
90,Daughters of Port Grove,statue_john_smoth_Meeting_8_Proposal,1.0,unknown,Historical recognition aligns with tourism int...
91,Teddy Goldstein,deep_fishing_dock_Meeting_3_Maintenance_Plan_D...,,unknown,
92,Industrial Shipping,deep_fishing_dock_Meeting_3_Maintenance_Plan_D...,,unknown,


In [34]:
df_tourism = df_sentiment[df_sentiment['industry'].str.contains('tourism', case=False)]
df_tourism

Unnamed: 0,person,proposal,sentiment,industry,reason
0,Teddy Goldstein,marine_life_deck_Meeting_12_Environmental_Impa...,-0.5,tourism,Prefers resources to be allocated toward the f...
4,Ed Helpsford,concert_Meeting_7_Discussion,0.5,tourism,Believes concerts can bring community together...
9,Ed Helpsford,waterfront_market_Meeting_9_Report_Discussion,1.0,tourism,Strongly supports as it could benefit small ve...
17,Ed Helpsford,waterfront_market_Meeting_7_Benefits_Feasibility,1.0,tourism,Strongly supports as it could benefit small ve...
19,Ed Helpsford,concert_Meeting_16_Road_Closures,0.5,tourism,Believes concerts can bring community together...
21,Teddy Goldstein,expanding_tourist_wharf_Meeting_9_Start_Report...,-0.5,tourism,Seen as prioritizing tourism over fishing-rela...
22,Seal,expanding_tourist_wharf_Meeting_9_Start_Report...,0.1,tourism,"Seems like headache, but if other people are w..."
24,Teddy Goldstein,expanding_tourist_wharf_Meeting_9_Report,-0.5,tourism,Seen as prioritizing tourism over fishing-rela...
25,Seal,expanding_tourist_wharf_Meeting_9_Report,0.1,tourism,"Seems like headache, but if other people are w..."
29,Teddy Goldstein,expanding_tourist_wharf_Meeting_10_Report_Upda...,-0.5,tourism,Seen as prioritizing tourism over fishing-rela...


In [35]:
import plotly.graph_objects as go

fig = go.Figure(go.Bar(
    x=df_tourism['person'],
    y=df_tourism['sentiment'],
    text=df_tourism['reason'],
    hoverinfo='text+y',
    marker_color=df_tourism['sentiment']
))

fig.update_layout(
    title="Sentiment Toward Tourism Topics (from TROUT Dataset)",
    yaxis_title="Sentiment Score",
    xaxis_title="Person",
    yaxis=dict(range=[-1.1, 1.1]),
    template="plotly_white"
)

fig.show()


## TROUT content analysis
We want to determine:

- Do TROUT-affiliated individuals express positive sentiment toward fishing projects?

- Are tourism projects being undervalued or rejected in sentiment?

- Which industries (tourism, fishing, etc.) receive more sentiment overall?

- Is there any pattern of bias or imbalance?

### STEPS
1. Group sentiments by industry.

2. Compute average sentiment per industry.

3. Count the number of opinions expressed per industry.

4. Visualize the balance using a bar chart.

#### 1. Group sentiments by industry

In [36]:
G = nx.DiGraph()

# Add nodes with labels
for node in trout_data['nodes']:
    node_id = node['id']
    label = node.get('label', node_id)
    G.add_node(node_id, label=label, type=node.get('type', 'unknown'))

# Add edges for participant sentiment links
for link in links:
    if link.get('role') == 'participant' and 'sentiment' in link:
        source = link['source']  # usually the meeting or plan
        target = link['target']  # person
        sentiment = link['sentiment']
        G.add_edge(source, target, sentiment=sentiment)


In [37]:
proposals = set()

for link in links:
    if link.get('role') == 'participant' and 'sentiment' in link:
        source = link['source']
        proposals.add(source)

print(f"Number of unique proposals or meetings: {len(proposals)}")


Number of unique proposals or meetings: 71


In [38]:
from collections import defaultdict

grouped_proposals = defaultdict(list)

for name in proposals:
    name_lower = name.lower()
    if any(k in name_lower for k in ["tour", "event", "concert", "cultural", "culture", "tourist", "tourism", "trip", "travel", "walking", "festival", "statue", "park"]):
        group = 'Turismo'
    elif any(k in name_lower for k in ["fish", "fishing", "market", "dock", "crane", "ship", "harbor", "vacuum", "lomark", "himark"]):
        group = 'Pesca'
    else:
        group = 'Otro'
    grouped_proposals[group].append(name)

# Display group counts
for group, items in grouped_proposals.items():
    print(f"{group}: {len(items)} proposals")


Turismo: 26 proposals
Otro: 15 proposals
Pesca: 30 proposals


In [39]:
import plotly.express as px

group_counts = [{'Tema': k, 'Cantidad de reuniones': len(v)} for k, v in grouped_proposals.items()]
df_grouped = pd.DataFrame(group_counts)

fig = px.bar(df_grouped, x='Tema', y='Cantidad de reuniones', title='Cantidad de reuniones por tema (TROUT)', text='Cantidad de reuniones')
fig.show()


Average sentiment with original industries

In [40]:
industry_sentiment_summary = df_sentiment.groupby("industry").agg(
    average_sentiment=("sentiment", "mean"),
    sentiment_count=("sentiment", "count")
).reset_index()
industry_sentiment_summary

Unnamed: 0,industry,average_sentiment,sentiment_count
0,large vessel,0.573529,34
1,small vessel,0.590909,11
2,tourism,0.191304,23
3,unknown,0.171429,14


Average sentiment with created proposal topics

In [41]:
# Map each proposal to its corresponding group
proposal_to_group = {}
for group, proposals_list in grouped_proposals.items():
    for proposal in proposals_list:
        proposal_to_group[proposal] = group

# Add the group label to the sentiment DataFrame
df_sentiment["proposal_group"] = df_sentiment["proposal"].map(proposal_to_group).fillna("Other")

# Group sentiment data by proposal group
group_sentiment_summary = df_sentiment.groupby("proposal_group").agg(
    average_sentiment=("sentiment", "mean"),
    sentiment_count=("sentiment", "count")
).reset_index()

group_sentiment_summary

Unnamed: 0,proposal_group,average_sentiment,sentiment_count
0,Otro,0.47619,21
1,Pesca,0.437931,29
2,Turismo,0.315625,32


Sentiment analysis per person per industry

In [42]:
from collections import defaultdict

# Rebuild the proposal list from the sentiment data
proposals = df_sentiment["proposal"].unique()

# Regenerate grouped_proposals and proposal_to_group
grouped_proposals = defaultdict(list)
proposal_to_group = {}

for name in proposals:
    name_lower = name.lower()
    if any(k in name_lower for k in ["tour", "event", "concert", "cultural", "culture", "tourist", "tourism", "trip", "travel", "walking", "festival", "statue", "park"]):
        group = 'Turismo'
    elif any(k in name_lower for k in ["fish", "fishing", "market", "dock", "crane", "ship", "harbor", "vacuum", "lomark", "himark"]):
        group = 'Pesca'
    # else:
    #     group = 'Otro'
    grouped_proposals[group].append(name)
    proposal_to_group[name] = group

# Add a 'theme' column to df_sentiment
df_sentiment["theme"] = df_sentiment["proposal"].map(proposal_to_group).fillna("Other")

# Group by person and theme
person_theme_sentiment = df_sentiment.groupby(["person", "theme"]).agg(
    average_sentiment=("sentiment", "mean"),
    sentiment_count=("sentiment", "count")
).reset_index()

person_theme_sentiment

Unnamed: 0,person,theme,average_sentiment,sentiment_count
0,Builders Association,Turismo,1.0,4
1,Daughters of Port Grove,Pesca,0.0,2
2,Daughters of Port Grove,Turismo,0.75,4
3,Ed Helpsford,Pesca,0.692308,13
4,Ed Helpsford,Turismo,0.714286,7
5,High Seas Fishing Inc.,Pesca,0.666667,6
6,Industrial Shipping,Pesca,1.0,3
7,Industrial Shipping,Turismo,1.0,3
8,PTA,Turismo,0.0,2
9,Paackland Container Inc.,Turismo,-0.5,2


In [43]:
import plotly.express as px

# Create a pivot table for heatmap: rows=person, columns=theme, values=average_sentiment
heatmap_data = person_theme_sentiment.pivot(index='person', columns='theme', values='average_sentiment').fillna(0)

# Fix compatibility issue by converting the DataFrame to float explicitly
heatmap_data = heatmap_data.astype(float)

# Use plotly.graph_objects as a fallback to avoid compatibility issues
import plotly.graph_objects as go

fig = go.Figure(data=go.Heatmap(
    z=heatmap_data.values,
    x=heatmap_data.columns,
    y=heatmap_data.index,
    colorscale='RdYlGn',
    colorbar=dict(title='Sentimiento promedio')
))

fig.update_layout(
    title='Sentimiento promedio por participante por tema (TROUT)',
    xaxis_title='Tema',
    yaxis_title='Participante'
)

fig.show()


# Análisis exploratorio FILAH

In [44]:
import json
import pandas as pd
from collections import defaultdict

# 1. Load the JSON file
filah_path = "/content/drive/MyDrive/Colab Notebooks/C3_Visualización_Información/VI - VAST Challenge 2/FILAH.json"
with open(filah_path, "r", encoding="utf-8") as f:
    filah_data = json.load(f)

# 2. Extract nodes and links
nodes_filah = {node['id']: node for node in filah_data.get('nodes', [])}
links_filah = filah_data.get('links', [])

# 3. Flatten sentiment records
sentiment_records_filah = []
for link in links_filah:
    if link.get('role') == 'participant' and 'sentiment' in link:
        sentiment = link['sentiment']
        source = link['source']
        target = link['target']
        industry_list = link.get('industry')
        industry = industry_list[0] if isinstance(industry_list, list) and industry_list else 'unknown'
        reason = link.get('reason', '')
        sentiment_records_filah.append({
            "person": target,
            "proposal": source,
            "sentiment": sentiment,
            "industry": industry,
            "reason": reason
        })

df_sentiment_filah = pd.DataFrame(sentiment_records_filah)
df_sentiment_filah

Unnamed: 0,person,proposal,sentiment,industry,reason
0,Tours Central Ticketing,expanding_tourist_wharf_Meeting_7_Initial_View...,1.00,tourism,More tourism so more tickets sold.
1,Paackland Container Inc.,expanding_tourist_wharf_Meeting_7_Initial_View...,-0.50,tourism,Tourism is a competitor for the same resources.
2,Seal,expanding_tourist_wharf_Meeting_7_Initial_View...,0.10,tourism,"Seems like headache, but if other people are w..."
3,Simone Kat,expanding_tourist_wharf_Travel_Harbor_Route_So...,0.50,tourism,Supports tourism growth
4,Simone Kat,expanding_tourist_wharf_Travel_Harbor_Route_So...,0.50,tourism,Supports tourism growth
...,...,...,...,...,...
113,Simone Kat,waterfront_market_Meeting_7_Discussion,0.75,tourism,Supports as it aligns well with tourism and lo...
114,Carol Limpet,waterfront_market_Meeting_7_Discussion,1.00,tourism,Strongly supports as it aligns with community ...
115,Carol Limpet,waterfront_market_Travel_Harbor_Edge_Grill_Dis...,1.00,tourism,Strongly supports as it aligns with community ...
116,Carol Limpet,waterfront_market_Travel_Harbor_Edge_Grill,1.00,tourism,Strongly supports as it aligns with community ...


In [45]:

from collections import defaultdict

# Extract unique proposals from the FILAH sentiment data
proposals_filah = df_sentiment_filah["proposal"].unique()

# Group the proposals by theme
grouped_proposals_filah = defaultdict(list)
proposal_to_group_filah = {}

for name in proposals_filah:
    name_lower = name.lower()
    if any(k in name_lower for k in ["tour", "event", "concert", "cultural", "culture", "tourist", "tourism", "trip", "travel", "walking", "festival", "statue", "park"]):
        group = 'Turismo'
    elif any(k in name_lower for k in ["fish", "fishing", "market", "dock", "crane", "ship", "harbor", "vacuum", "lomark", "himark"]):
        group = 'Pesca'
    else:
        group = 'Otro'
    grouped_proposals_filah[group].append(name)
    proposal_to_group_filah[name] = group

In [46]:
group_counts_filah = [{'Grupo': k, 'Cantidad de reuniones': len(v)} for k, v in grouped_proposals_filah.items()]
df_grouped_filah = pd.DataFrame(group_counts_filah)

fig = px.bar(
    df_grouped_filah,
    x='Grupo',
    y='Cantidad de reuniones',
    title='Cantidad de reuniones por tema (FILAH)',
    text='Cantidad de reuniones'
)
fig.show()

In [47]:
# 2. Extraer la lista de propuestas (campo 'source' de los enlaces participant)
proposals_trout = {link["source"] for link in trout_data["links"] if link.get("role") == "participant"}
proposals_filah = {link["source"] for link in filah_data["links"] if link.get("role") == "participant"}

# 3. Función para agrupar por tema
def group_by_theme(names):
    grouped = defaultdict(list)
    for name in names:
        nl = name.lower()
        if any(k in nl for k in ["tour","event","concert","cultural","culture","tourist","tourism","trip","travel","walking","festival","statue","park"]):
            grouped["Turismo"].append(name)
        elif any(k in nl for k in ["fish","fishing","market","dock","crane","ship","harbor","vacuum","lomark","himark"]):
            grouped["Pesca"].append(name)
        else:
            grouped["Otro"].append(name)
    return grouped

grouped_trout = group_by_theme(proposals_trout)
grouped_filah = group_by_theme(proposals_filah)

# 4. Crear DataFrames de conteos
df_trout = pd.DataFrame([
    {"Tema": tema, "TROUT": len(names)}
    for tema, names in grouped_trout.items()
])
df_filah = pd.DataFrame([
    {"Tema": tema, "FILAH": len(names)}
    for tema, names in grouped_filah.items()
])

# 5. Unir y preparar para plotting
df = pd.merge(df_trout, df_filah, on="Tema", how="outer").fillna(0)
df[["TROUT","FILAH"]] = df[["TROUT","FILAH"]].astype(int)
df_melt = df.melt(
    id_vars="Tema",
    value_vars=["TROUT","FILAH"],
    var_name="Fuente",
    value_name="Cantidad de reuniones"
)

# 6. Gráfico combinado
fig = px.bar(
    df_melt,
    x="Tema",
    y="Cantidad de reuniones",
    color="Fuente",
    barmode="group",
    title="Cantidad de reuniones por tema: TROUT vs FILAH",
    labels={
        "Tema": "Tema",
        "Cantidad de reuniones": "Cantidad de reuniones",
        "Fuente": "Conjunto de datos"
    },
    color_discrete_map={"TROUT":"goldenrod","FILAH":"royalblue"}
)
fig.update_layout(
    xaxis=dict(categoryorder="array", categoryarray=["Turismo","Pesca","Otro"])
)
fig.show()

In [48]:

# 4. Group sentiment by industry
industry_summary_filah = df_sentiment_filah.groupby("industry").agg(
    average_sentiment=("sentiment", "mean"),
    sentiment_count=("sentiment", "count")
).reset_index()
industry_summary_filah

Unnamed: 0,industry,average_sentiment,sentiment_count
0,large vessel,-0.066667,27
1,small vessel,0.6,15
2,tourism,0.693,50
3,unknown,0.288235,17


In [49]:
# 5. Classify proposals into groups
proposals_filah = df_sentiment_filah["proposal"].unique()
grouped_proposals_filah = defaultdict(list)
proposal_to_group_filah = {}

for name in proposals_filah:
    name_lower = name.lower()
    if any(k in name_lower for k in ["tour", "event", "concert", "cultural", "culture", "tourist", "tourism", "trip", "travel", "walking", "festival", "statue", "park"]):
        group = 'Turismo'
    elif any(k in name_lower for k in ["fish", "fishing", "market", "dock", "crane", "ship", "harbor", "vacuum", "lomark", "himark"]):
        group = 'Pesca'
    # else:
    #     group = 'Other'
    grouped_proposals_filah[group].append(name)
    proposal_to_group_filah[name] = group

df_sentiment_filah["proposal_group"] = df_sentiment_filah["proposal"].map(proposal_to_group_filah).fillna("Other")
df_sentiment_filah

Unnamed: 0,person,proposal,sentiment,industry,reason,proposal_group
0,Tours Central Ticketing,expanding_tourist_wharf_Meeting_7_Initial_View...,1.00,tourism,More tourism so more tickets sold.,Turismo
1,Paackland Container Inc.,expanding_tourist_wharf_Meeting_7_Initial_View...,-0.50,tourism,Tourism is a competitor for the same resources.,Turismo
2,Seal,expanding_tourist_wharf_Meeting_7_Initial_View...,0.10,tourism,"Seems like headache, but if other people are w...",Turismo
3,Simone Kat,expanding_tourist_wharf_Travel_Harbor_Route_So...,0.50,tourism,Supports tourism growth,Turismo
4,Simone Kat,expanding_tourist_wharf_Travel_Harbor_Route_So...,0.50,tourism,Supports tourism growth,Turismo
...,...,...,...,...,...,...
113,Simone Kat,waterfront_market_Meeting_7_Discussion,0.75,tourism,Supports as it aligns well with tourism and lo...,Pesca
114,Carol Limpet,waterfront_market_Meeting_7_Discussion,1.00,tourism,Strongly supports as it aligns with community ...,Pesca
115,Carol Limpet,waterfront_market_Travel_Harbor_Edge_Grill_Dis...,1.00,tourism,Strongly supports as it aligns with community ...,Turismo
116,Carol Limpet,waterfront_market_Travel_Harbor_Edge_Grill,1.00,tourism,Strongly supports as it aligns with community ...,Turismo


In [50]:

# 6. Sentiment summary by proposal group
group_summary_filah = df_sentiment_filah.groupby("proposal_group").agg(
    average_sentiment=("sentiment", "mean"),
    sentiment_count=("sentiment", "count")
).reset_index()
group_summary_filah

Unnamed: 0,proposal_group,average_sentiment,sentiment_count
0,Pesca,0.1975,20
1,Turismo,0.480899,89


In [51]:

# 7. Sentiment per person by theme
df_sentiment_filah["theme"] = df_sentiment_filah["proposal_group"]
person_theme_sentiment_filah = df_sentiment_filah.groupby(["person", "theme"]).agg(
    average_sentiment=("sentiment", "mean"),
    sentiment_count=("sentiment", "count")
).reset_index()

In [52]:
import plotly.graph_objects as go

# Convert to pivot table for heatmap
heatmap_data_filah = person_theme_sentiment_filah.pivot(index='person', columns='theme', values='average_sentiment').fillna(0)
heatmap_data_filah = heatmap_data_filah.astype(float)

# Plot heatmap
fig = go.Figure(data=go.Heatmap(
    z=heatmap_data_filah.values,
    x=heatmap_data_filah.columns,
    y=heatmap_data_filah.index,
    colorscale='RdYlGn',
    colorbar=dict(title='Sentimiento promedio')
))

fig.update_layout(
    title='Sentimiento promedio por participante por tema (FILAH)',
    xaxis_title='Tema',
    yaxis_title='Participante'
)

fig.show()


# Grafo interactivo

In [53]:
!pip install ipycytoscape



In [54]:
import json
import networkx as nx

# Extract nodes and links
trout_links = trout_data.get("links", [])
trout_nodes = {node['id']: node for node in trout_data.get("nodes", [])}

# Rebuild the graph with a safer industry extraction
G_trout = nx.DiGraph()

for link in trout_links:
    if link.get('role') == 'participant' and 'sentiment' in link:
        sentiment = link['sentiment']
        source = link['source']
        target = link['target']
        reason = link.get('reason', '')

        industry_list = link.get('industry')
        industry = industry_list[0] if isinstance(industry_list, list) and industry_list else 'unknown'

        G_trout.add_node(source, label=source, type='proposal')
        G_trout.add_node(target, label=target, type='person')
        G_trout.add_edge(source, target, sentiment=sentiment, reason=reason, industry=industry)

# Confirm graph construction
len(G_trout.nodes), len(G_trout.edges)


(82, 94)

In [55]:
from ipycytoscape import CytoscapeWidget

cyto = CytoscapeWidget()
cyto.graph.add_graph_from_networkx(G_trout, directed=True)

cyto.set_style([
    {
        'selector': 'node',
        'style': {
            'label': 'data(label)',
            'background-color': 'mapData(type, "proposal", "lightblue", "person", "lightgreen")',
            'border-width': '2px',
            'border-color': '#555'
        }
    },
    {
        'selector': 'edge',
        'style': {
            'curve-style': 'bezier',
            'target-arrow-shape': 'triangle',
            'line-color': 'mapData(sentiment, -1, 1, red, green)',
            'width': 2,
            'label': 'data(industry)'
        }
    }
])

cyto.set_layout(name='cose')
cyto


CytoscapeWidget(cytoscape_layout={'name': 'cose'}, cytoscape_style=[{'selector': 'node', 'style': {'label': 'd…

# Grafo para D3

In [56]:
# import json

# # Create node list
# nodes = []
# node_ids = set()
# for node_id, data in G_trout.nodes(data=True):
#     if node_id not in node_ids:
#         nodes.append({
#             "id": node_id,
#             "label": data.get("label", node_id),
#             "type": data.get("type", "unknown")
#         })
#         node_ids.add(node_id)

# # Create link list
# links = []
# for u, v, data in G_trout.edges(data=True):
#     links.append({
#         "source": u,
#         "target": v,
#         "sentiment": data.get("sentiment", 0),
#         "industry": data.get("industry", ""),
#         "reason": data.get("reason", "")
#     })

# # Export to file
# with open("trout_graph_d3.json", "w") as f:
#     json.dump({"nodes": nodes, "links": links}, f, indent=2)

# # Download link in Colab
# from google.colab import files
# files.download("trout_graph_d3.json")


#TAREA 1

In [57]:
def safe_extract_bias_links(data, bias_group_keywords):
    result = []
    for link in data.get("links", []):
        if link.get("role") == "participant" and isinstance(link.get("sentiment"), (int, float)):
            for keyword in bias_group_keywords:
                if keyword.lower() in link.get("source", "").lower():
                    result.append(link)
    return result

def summarize_sentiments(links):
    pos = sum(1 for l in links if l.get("sentiment", 0) > 0.2)
    neg = sum(1 for l in links if l.get("sentiment", 0) < -0.2)
    neu = len(links) - pos - neg
    return {"Positivo": pos, "Neutro": neu, "Negativo": neg}

trout_keywords = ["tour", "event", "concert", "cultural", "culture", "tourist", "tourism",  "travel", "walking", "festival", "statue", "park"]
filah_keywords = ["fish", "fishing", "market", "dock", "crane", "ship", "harbor", "vacuum", "lomark", "himark"]

# Re-run extraction with updated filter
trout_on_filah = safe_extract_bias_links(trout_data, filah_keywords)
trout_on_trout = safe_extract_bias_links(trout_data, trout_keywords)
filah_on_trout = safe_extract_bias_links(filah_data, trout_keywords)
filah_on_filah = safe_extract_bias_links(filah_data, filah_keywords)

# Rebuild summary table
summary = pd.DataFrame({
    "TROUT sobre turismo": summarize_sentiments(trout_on_filah),
    "FILAH sobre turismo": summarize_sentiments(filah_on_filah),
    "TROUT sobre pesca": summarize_sentiments(trout_on_trout),
    "FILAH sobre pesca": summarize_sentiments(filah_on_trout),
}).T

# Plot con Plotly
categories = summary.index.tolist()
sentiments = summary.columns.tolist()
colors = {"Positivo": "green", "Neutro": "gray", "Negativo": "red"}

traces = []
for sentiment in sentiments:
    traces.append(go.Bar(
        name=sentiment.capitalize(),
        x=categories,
        y=summary[sentiment],
        marker_color=colors[sentiment]
    ))

fig = go.Figure(data=traces)
fig.update_layout(
    barmode='stack',
    title='Sentimientos expresados hacia propuestas de TROUT y FILAH',
    xaxis_title='Origen de los datos',
    yaxis_title='Cantidad de participaciones',
    legend_title='Sentimiento',
    template='plotly_white'
)

fig.show()

In [58]:
def extract_links_with_industry(data):
    links = []
    for link in data.get("links", []):
        if link.get("role") == "participant" and isinstance(link.get("sentiment"), (int, float)):
            industries = link.get("industry", [])
            if isinstance(industries, list) and industries:
                for industry in industries:
                    links.append({
                        "source": link.get("source"),
                        "target": link.get("target"),
                        "sentiment": link.get("sentiment"),
                        "industry": industry.lower().strip()
                    })
    return links

# Crear DataFrames
df_trout = pd.DataFrame(extract_links_with_industry(trout_data))
df_filah = pd.DataFrame(extract_links_with_industry(filah_data))

# Función para agrupar sentimientos por industria
def build_sentiment_matrix(df, label):
    grouped = df.groupby("industry").agg(
        positive=("sentiment", lambda x: (x > 0.2).sum()),
        neutral=("sentiment", lambda x: ((x <= 0.2) & (x >= -0.2)).sum()),
        negative=("sentiment", lambda x: (x < -0.2).sum())
    )
    grouped["source"] = label
    return grouped.reset_index()

# Crear matrices
summary_trout = build_sentiment_matrix(df_trout, "TROUT")
summary_filah = build_sentiment_matrix(df_filah, "FILAH")

# Unir y reorganizar
summary_combined = pd.concat([summary_trout, summary_filah])
summary_melted = summary_combined.melt(id_vars=["industry", "source"],
                                       value_vars=["positive", "neutral", "negative"],
                                       var_name="sentiment", value_name="count")
# Traducir los valores de sentimiento
sentiment_translation = {
    "positive": "Positivo",
    "neutral": "Neutro",
    "negative": "Negativo"
}
summary_melted["sentiment_es"] = summary_melted["sentiment"].map(sentiment_translation)

# Definir colores personalizados para las etiquetas en español
custom_colors_es = {
    ("Positivo", "TROUT"): "green",
    ("Positivo", "FILAH"): "lightgreen",
    ("Neutro", "TROUT"): "gold",
    ("Neutro", "FILAH"): "moccasin",
    ("Negativo", "TROUT"): "red",
    ("Negativo", "FILAH"): "indianred"
}

# Crear gráfico
import plotly.graph_objects as go

fig = go.Figure()

for sentiment_es in ["Positivo", "Neutro", "Negativo"]:
    for source in ["TROUT", "FILAH"]:
        subset = summary_melted[
            (summary_melted["sentiment_es"] == sentiment_es) &
            (summary_melted["source"] == source)
        ]
        fig.add_trace(go.Bar(
            name=f"{sentiment_es} - {source}",
            x=subset["industry"],
            y=subset["count"],
            marker_color=custom_colors_es[(sentiment_es, source)]
        ))

fig.update_layout(
    barmode="stack",
    title="Sentimientos por industria según origen de los datos (TROUT vs FILAH)",
    xaxis_title="Industria",
    yaxis_title="Cantidad de participaciones",
    legend_title="Sentimiento (Fuente)",
    template="plotly_white"
)

fig.show()


In [59]:
def extract_links_with_industry(data):
    links = []
    for link in data.get("links", []):
        if link.get("role") == "participant" and isinstance(link.get("sentiment"), (int, float)):
            industries = link.get("industry", [])
            if isinstance(industries, list) and industries:
                for industry in industries:
                    links.append({
                        "source": link.get("source"),
                        "target": link.get("target"),
                        "sentiment": link.get("sentiment"),
                        "industry": industry.lower().strip()
                    })
    return links

df_trout = pd.DataFrame(extract_links_with_industry(trout_data))
df_filah = pd.DataFrame(extract_links_with_industry(filah_data))

# Palabras clave por categoría
tourism_keywords = ["tour", "trip", "tourist", "festival", "statue", "marine", "deck", "route", "waterfront", "park"]
fishing_keywords = ["crane", "fishing", "dock", "fish", "harbor", "market", "seafood", "vacuum"]

# Clasificar por temática
def categorize_by_keywords_and_industry(source, industries):
    source_lower = source.lower()
    industry_lower = [i.lower() for i in industries]

    if "tourism" in industry_lower or any(k in source_lower for k in tourism_keywords):
        if not any(k in source_lower for k in fishing_keywords):
            return "Turismo"
    if any(k in source_lower for k in fishing_keywords):
        return "Pesca"
    return "Otra"

# Aplicar categorización
def classify_theme_keywords_only(df):
    return df.assign(theme=df.apply(lambda row: categorize_by_keywords_and_industry(row["source"], [row["industry"]]), axis=1))

df_trout_theme_kw = classify_theme_keywords_only(df_trout)
df_filah_theme_kw = classify_theme_keywords_only(df_filah)

# Construir resumen
def build_matrix_by_theme(df, label):
    grouped = df.groupby("theme").agg(
        positive=("sentiment", lambda x: (x > 0.2).sum()),
        neutral=("sentiment", lambda x: ((x <= 0.2) & (x >= -0.2)).sum()),
        negative=("sentiment", lambda x: (x < -0.2).sum())
    )
    grouped["source"] = label
    return grouped.reset_index()

summary_trout_kw = build_matrix_by_theme(df_trout_theme_kw, "TROUT")
summary_filah_kw = build_matrix_by_theme(df_filah_theme_kw, "FILAH")

summary_kw_combined = pd.concat([summary_trout_kw, summary_filah_kw])
summary_kw_melted = summary_kw_combined.melt(
    id_vars=["theme", "source"],
    value_vars=["positive", "neutral", "negative"],
    var_name="sentiment",
    value_name="count"
)

# Traducción
summary_kw_melted["sentiment_es"] = summary_kw_melted["sentiment"].map({
    "positive": "Positivo",
    "neutral": "Neutro",
    "negative": "Negativo"
})

# Colores personalizados
custom_colors_es = {
    ("Positivo", "TROUT"): "green",
    ("Positivo", "FILAH"): "lightgreen",
    ("Neutro", "TROUT"): "gold",
    ("Neutro", "FILAH"): "moccasin",
    ("Negativo", "TROUT"): "red",
    ("Negativo", "FILAH"): "indianred"
}

# Graficar
fig = go.Figure()

for sentiment_es in ["Positivo", "Neutro", "Negativo"]:
    for source in ["TROUT", "FILAH"]:
        subset = summary_kw_melted[
            (summary_kw_melted["sentiment_es"] == sentiment_es) &
            (summary_kw_melted["source"] == source)
        ]
        fig.add_trace(go.Bar(
            name=f"{sentiment_es} - {source}",
            x=subset["theme"],
            y=subset["count"],
            marker_color=custom_colors_es[(sentiment_es, source)]
        ))

fig.update_layout(
    barmode="stack",
    title="Sentimientos por categoría y conjunto de datos",
    xaxis_title="Tema",
    yaxis_title="Cantidad de participaciones",
    legend_title="Sentimiento (Fuente)",
    template="plotly_white"
)

fig.show()

In [60]:
# Filtrar solo Pesca y Turismo
valid_themes = ["Turismo", "Pesca"]
filtered_df = summary_kw_melted[summary_kw_melted["theme"].isin(valid_themes)].copy()

# Nueva columna combinando fuente y categoría
filtered_df["grupo"] = filtered_df["source"] + " sobre " + filtered_df["theme"]

# Orden personalizado de columnas
group_order = ["TROUT sobre pesca", "FILAH sobre pesca", "TROUT sobre turismo", "FILAH sobre turismo"]

# Colores por sentimiento y fuente
custom_colors_es = {
    ("Positivo", "TROUT"): "lightgreen",
    ("Neutro", "TROUT"): "moccasin",
    ("Negativo", "TROUT"): "indianred",
    ("Positivo", "FILAH"): "lightgreen",
    ("Neutro", "FILAH"): "moccasin",
    ("Negativo", "FILAH"): "indianred"
}

import plotly.graph_objects as go

fig = go.Figure()

for sentiment in ["Positivo", "Neutro", "Negativo"]:
    for source in ["TROUT", "FILAH"]:
        for theme in ["Pesca", "Turismo"]:
            group_label = f"{source} sobre {theme}"
            subset = filtered_df[
                (filtered_df["grupo"] == group_label) &
                (filtered_df["sentiment_es"] == sentiment)
            ]
            color = custom_colors_es[(sentiment, source)]
            count = subset["count"].values[0] if not subset.empty else 0
            fig.add_trace(go.Bar(
                name=f"{sentiment} - {source}",
                x=[group_label],
                y=[count],
                marker_color=color,
                showlegend=False  # solo mostrar una vez
            ))

legend_labels = []
for sentiment in ["Positivo", "Neutro", "Negativo"]:
    label = sentiment
    color = custom_colors_es[(sentiment, "FILAH")]
    if label not in legend_labels:
        fig.add_trace(go.Bar(
            x=[None],
            y=[None],
            name=label,
            marker_color=color,
            showlegend=True
        ))
        legend_labels.append(label)

fig.update_layout(
    barmode="stack",
    title="Sentimientos hacia propuestas de pesca y turismo (TROUT vs FILAH)",
    xaxis_title="Grupo de análisis",
    yaxis_title="Cantidad de participaciones",
    xaxis=dict(categoryorder="array", categoryarray=group_order),
    legend_title="Sentimiento",
    template="plotly_white"
)

fig.show()


## Análisis por persona

In [61]:
person_ids = {
    node["id"] for node in trout_data.get("nodes", [])
    if node.get("type") == "entity.person"
}

# Extraer participaciones válidas (rol participant, target = persona, con sentimiento válido)
participant_links = [
    {
        "person": link.get("target"),
        "meeting": link.get("source"),
        "sentiment": link.get("sentiment"),
        "industry": link.get("industry", [])
    }
    for link in trout_data.get("links", [])
    if link.get("role") == "participant"
    and isinstance(link.get("sentiment"), (int, float))
    and link.get("target") in person_ids
]

df_person_participants = pd.DataFrame(participant_links)

In [62]:
# Separar múltiples industrias en filas individuales
df_exploded = df_person_participants.explode("industry").dropna(subset=["industry"])
df_exploded["industry"] = df_exploded["industry"].str.strip().str.lower()

# Agrupar
df_sentiment_by_industry = df_exploded.groupby(["person", "industry"]).agg(
    average_sentiment=("sentiment", "mean"),
    participations=("sentiment", "count")
).reset_index()


In [63]:
# # Categorías a partir de palabras clave
# tourism_keywords = ["tour", "trip", "tourist", "festival", "statue", "marine", "deck", "route", "waterfront", "park"]
# fishing_keywords = ["crane", "fishing", "dock", "fish", "harbor", "market", "seafood", "vacuum"]

def categorize_theme(meeting_name):
    name = meeting_name.lower()
    if any(k in name for k in tourism_keywords):
        return "Turismo"
    elif any(k in name for k in fishing_keywords):
        return "Pesca"
    else:
        return "Otra"

# Aplicar a las reuniones
df_person_participants["theme"] = df_person_participants["meeting"].apply(categorize_theme)

# Agrupar por persona y tema
df_sentiment_by_theme = df_person_participants.groupby(["person", "theme"]).agg(
    average_sentiment=("sentiment", "mean"),
    participations=("sentiment", "count")
).reset_index()


In [64]:
import plotly.express as px

fig = px.bar(
    df_sentiment_by_industry,
    x="person",
    y="average_sentiment",
    color="industry",
    barmode="group",
    title="Sentimiento promedio por persona por industria (explícita)",
    labels={"person": "Participante", "average_sentiment": "Sentimiento promedio", "industry": "Industria"},
    color_discrete_sequence=px.colors.qualitative.Set2
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()


In [65]:
fig2 = px.bar(
    df_sentiment_by_theme,
    x="person",
    y="average_sentiment",
    color="theme",
    barmode="group",
    title="Sentimiento promedio por persona por industria (por keywords)",
    labels={"person": "Participante", "average_sentiment": "Sentimiento promedio", "theme": "Industria"},
    color_discrete_map={
        "Turismo": "goldenrod",
        "Pesca": "royalblue",
        "Otra": "lightgray"
    }
)
fig2.update_layout(xaxis_tickangle=-45)
fig2.show()


In [66]:
# # Separar múltiples industrias en filas individuales
# df_exploded = df_person_participants.explode("industry").dropna(subset=["industry"])
# df_exploded["industry"] = df_exploded["industry"].str.strip().str.lower()

# Agrupar para obtener sentimiento total
df_total_sentiment_by_industry = df_exploded.groupby(["person", "industry"]).agg(
    total_sentiment=("sentiment", "sum"),
    participations=("sentiment", "count")
).reset_index()


In [67]:
# Reutilizamos df_person_participants con columna 'theme' ya calculada

# Agrupar por persona y tema sumando los valores de sentimiento
df_total_sentiment_by_theme = df_person_participants.groupby(["person", "theme"]).agg(
    total_sentiment=("sentiment", "sum"),
    participations=("sentiment", "count")
).reset_index()


In [68]:
def categorize_theme_with_override(meeting_name):
    name = meeting_name.lower()
    if "large_vessel" in name:
        return "Pesca"
    elif "tourism" in name:
        return "Turismo"
    elif "small_vessel" in name:
        return "Pesca"
    elif any(k in name for k in tourism_keywords):
        return "Turismo"
    elif any(k in name for k in fishing_keywords):
        return "Pesca"
    else:
        return "Otra"

# Aplicar la nueva función
df_person_participants["theme"] = df_person_participants["meeting"].apply(categorize_theme_with_override)

# Volver a agrupar con la nueva clasificación
df_total_sentiment_by_theme = df_person_participants.groupby(["person", "theme"]).agg(
    total_sentiment=("sentiment", "sum"),
    participations=("sentiment", "count")
).reset_index()

In [69]:
orden_personas = ["Seal"] + sorted(df_total_sentiment_by_industry["person"].unique())
orden_personas = list(dict.fromkeys(orden_personas))  # elimina duplicados si los hay


In [70]:
fig = px.bar(
    df_total_sentiment_by_industry,
    x="person",
    y="total_sentiment",
    color="industry",
    barmode="group",
    title="Sentimiento agregado por persona por tema (TROUT)",
    labels={
        "person": "Participante",
        "total_sentiment": "Sentimiento agregado",
        "industry": "Tema",
        "large vessel": "gran navío",
        "small vessel": "embarcación pequeña",
        "tourism": "turismo"
    },
    color_discrete_map={
        "large vessel": "navy",
        "small vessel": "royalblue",
        "tourism": "sandybrown"
    },
    category_orders={"person": orden_personas}
)

fig.update_layout(
    xaxis_tickangle=-45,
    yaxis=dict(range=[-5, 20])
)

fig.for_each_trace(
    lambda t: t.update(
        name=t.name.replace("large vessel", "gran navío")
                    .replace("small vessel", "embarcación pequeña")
                    .replace("tourism", "turismo")
    )
)

fig.show()


In [71]:
fig2 = px.bar(
    df_total_sentiment_by_theme,
    x="person",
    y="total_sentiment",
    color="theme",
    barmode="group",
    title="Sentimiento total por persona por industria (por keywords)",
    labels={"person": "Participante", "total_sentiment": "Sentimiento total", "theme": "Industria"},
    color_discrete_map={
        "Turismo": "goldenrod",
        "Pesca": "royalblue",
        "Otra": "silver"
    }
)
fig2.update_layout(xaxis_tickangle=-45)
fig2.show()


In [72]:
filah_person_entities = set(p["name"] for p in filah_data.get("nodes", []) if p.get("type") == "entity.person")
df_filah = pd.DataFrame(extract_links_with_industry(filah_data))
df_filah_person_participants = df_filah[df_filah["target"].isin(filah_person_entities)].copy()


In [73]:
df_filah_exploded = df_filah_person_participants.explode("industry").dropna(subset=["industry"])
df_filah_exploded["industry"] = df_filah_exploded["industry"].str.strip().str.lower()

df_total_sentiment_by_industry_filah = df_filah_exploded.groupby(["target", "industry"]).agg(
    total_sentiment=("sentiment", "sum"),
    participations=("sentiment", "count")
).reset_index()

df_total_sentiment_by_industry_filah.rename(columns={"target": "person"}, inplace=True)


In [74]:
fig = px.bar(
    df_total_sentiment_by_industry_filah.replace({
        "large vessel": "gran navío",
        "small vessel": "embarcación pequeña",
        "tourism": "turismo"
    }),
    x="person",
    y="total_sentiment",
    color="industry",
    category_orders={"industry": ["gran navío", "embarcación pequeña", "turismo"]},
    barmode="group",
    title="Sentimiento agregado por persona por tema (FILAH)",
    labels={
        "person": "Participante",
        "total_sentiment": "Sentimiento agregado",
        "industry": "Tema"
    },
    color_discrete_map={
        "gran navío": "navy",
        "embarcación pequeña": "royalblue",
        "turismo": "sandybrown"
    }
)

fig.update_layout(xaxis_tickangle=-45)
fig.show()



# TAREA 2

In [75]:
# Load journalist.json file
json_path = "/content/drive/MyDrive/Colab Notebooks/C3_Visualización_Información/VI - VAST Challenge 2/journalist.json"

# Cargar el JSON
with open(json_path, "r", encoding="utf-8") as f:
    journalist_data = json.load(f)


In [76]:

# Extraer los IDs de los eventos de tipo "trip"
trip_ids = {node["id"] for node in journalist_data["nodes"] if node.get("type") == "trip"}

# Buscar entidades asociadas a esos viajes mediante links
trip_entity_links = []
for link in journalist_data.get("links", []):
    if link.get("source") in trip_ids:
        trip_entity_links.append({
            "trip_id": link.get("source"),
            "entity_id": link.get("target"),
            "role": link.get("role")
        })

# Asociar nombres y tipos de las entidades
entity_info = {
    node["id"]: {"name": node.get("name", node["id"]), "type": node.get("type")}
    for node in journalist_data["nodes"]
}

# Agregar nombre y tipo de entidad a cada registro
for record in trip_entity_links:
    entity_data = entity_info.get(record["entity_id"], {})
    record["entity_name"] = entity_data.get("name")
    record["entity_type"] = entity_data.get("type")

# Crear el DataFrame
df_trip_entities = pd.DataFrame(trip_entity_links)

# Mostrar la tabla
df_trip_entities

Unnamed: 0,trip_id,entity_id,role,entity_name,entity_type
0,trip_0,Simone Kat,,Simone Kat,entity.person
1,trip_0,Haacklee Ferry Terminal,,Haacklee Ferry Terminal,place
2,trip_0,South Paackland Ferry Terminal,,South Paackland Ferry Terminal,place
3,trip_0,582184557,,Pacific Nature Bureau,place
4,trip_0,581853838,,Jordan Administrative Center,place
...,...,...,...,...,...
1700,trip_341,Haacklee Ferry Terminal,,Haacklee Ferry Terminal,place
1701,trip_341,Port Grove Ferry Terminal,,Port Grove Ferry Terminal,place
1702,trip_341,36988183,,Port Grove Customs House,place
1703,trip_341,Suna Spit,,Suna Spit,place


In [77]:
from datetime import datetime

# Crear DataFrame con trips
df_trips = pd.DataFrame([
    {
        "trip_id": node["id"],
        "start": node.get("start"),
        "end": node.get("end")
    }
    for node in journalist_data["nodes"]
    if node.get("type") == "trip"
])

# Calcular duración en minutos
def compute_duration(row):
    try:
        start_time = datetime.strptime(row["start"], "%H:%M:%S")
        end_time = datetime.strptime(row["end"], "%H:%M:%S")
        duration = (end_time - start_time).total_seconds() / 60
        if duration < 0:  # Si la hora final es menor (pasó medianoche), ajustamos
            duration += 24 * 60
        return duration
    except:
        return None

df_trips["duration"] = df_trips.apply(compute_duration, axis=1)
df_trips

Unnamed: 0,trip_id,start,end,duration
0,trip_0,09:00:00,21:00:00,720.0
1,trip_1,06:29:00,12:36:00,367.0
2,trip_2,08:23:00,08:23:00,0.0
3,trip_3,07:28:00,07:59:00,31.0
4,trip_4,07:50:00,14:42:00,412.0
...,...,...,...,...
337,trip_337,08:40:00,12:58:00,258.0
338,trip_338,09:00:00,21:52:00,772.0
339,trip_339,07:47:00,11:10:00,203.0
340,trip_340,07:37:00,08:58:00,81.0


In [78]:
# Append duration to df_trip_entities
df_trip_entities = df_trip_entities.merge(df_trips[["trip_id", "duration"]], on="trip_id", how="left")
df_trip_entities

Unnamed: 0,trip_id,entity_id,role,entity_name,entity_type,duration
0,trip_0,Simone Kat,,Simone Kat,entity.person,720.0
1,trip_0,Haacklee Ferry Terminal,,Haacklee Ferry Terminal,place,720.0
2,trip_0,South Paackland Ferry Terminal,,South Paackland Ferry Terminal,place,720.0
3,trip_0,582184557,,Pacific Nature Bureau,place,720.0
4,trip_0,581853838,,Jordan Administrative Center,place,720.0
...,...,...,...,...,...,...
1700,trip_341,Haacklee Ferry Terminal,,Haacklee Ferry Terminal,place,720.0
1701,trip_341,Port Grove Ferry Terminal,,Port Grove Ferry Terminal,place,720.0
1702,trip_341,36988183,,Port Grove Customs House,place,720.0
1703,trip_341,Suna Spit,,Suna Spit,place,720.0


In [79]:
# Filtrar solo entidades que sean personas
df_trip_entities_persons = df_trip_entities[df_trip_entities["entity_type"] == "entity.person"]

# Mostrar la tabla resultante
df_trip_entities_persons

Unnamed: 0,trip_id,entity_id,role,entity_name,entity_type,duration
0,trip_0,Simone Kat,,Simone Kat,entity.person,720.0
5,trip_1,Seal,,Seal,entity.person,367.0
9,trip_2,Ed Helpsford,,Ed Helpsford,entity.person,0.0
11,trip_3,Teddy Goldstein,,Teddy Goldstein,entity.person,31.0
16,trip_4,Seal,,Seal,entity.person,412.0
...,...,...,...,...,...,...
1678,trip_337,Seal,,Seal,entity.person,258.0
1681,trip_338,Simone Kat,,Simone Kat,entity.person,772.0
1689,trip_339,Tante Titan,,Tante Titan,entity.person,203.0
1693,trip_340,Teddy Goldstein,,Teddy Goldstein,entity.person,81.0


In [82]:
# load "Eventos_con_industria_actualizada.csv"
df_meeting_time = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/C3_Visualización_Información/Eventos_con_industria_actualizada.csv")
df_meeting_time

Unnamed: 0,person_id,person_name,event_id,event_type,event_date,event_name_raw,event_name,meeting_number,meeting_type,industry
0,Seal,Seal,expanding_tourist_wharf_Meeting_7_Initial_View...,discussion,,,expanding_tourist_wharf,7.0,initial_views_discussion,tourism
1,Simone Kat,Simone Kat,expanding_tourist_wharf_Travel_Harbor_Route_So...,discussion,,,expanding_tourist_wharf,,,tourism
2,Simone Kat,Simone Kat,expanding_tourist_wharf_Travel_Harbor_Route_So...,plan,,,expanding_tourist_wharf,,,tourism
3,Seal,Seal,expanding_tourist_wharf_Meeting_8_Expand_Ideas...,discussion,,,expanding_tourist_wharf,8.0,expand_ideas_discussion,tourism
4,Simone Kat,Simone Kat,expanding_tourist_wharf_Travel_Harbor_Route_So...,discussion,,,expanding_tourist_wharf,,,tourism
...,...,...,...,...,...,...,...,...,...,...
169,Tante Titan,Tante Titan,concert_Meeting_16_Feedback,plan,,,concert,16.0,feedback,tourism
170,Ed Helpsford,Ed Helpsford,concert_Meeting_16_Road_Closures_Discussion,discussion,,,concert,16.0,road_closures_discussion,tourism
171,Ed Helpsford,Ed Helpsford,concert_Meeting_16_Road_Closures,plan,,,concert,16.0,road_closures,tourism
172,Tante Titan,Tante Titan,concert_Meeting_16_Communication_Complaints_Di...,discussion,,,concert,16.0,communication_complaints_discussion,tourism


In [83]:
# Agrupar por persona y nombre de evento, y contar cuántas veces asistió a cada uno
df_attendance_by_event = df_meeting_time.groupby(
    ["person_id", "event_name", "industry"]
).size().reset_index(name="attended")

df_attendance_by_event


Unnamed: 0,person_id,event_name,industry,attended
0,Carol Limpet,marine_life_deck,tourism,7
1,Carol Limpet,renaming_park_himark,tourism,5
2,Carol Limpet,seafood_festival,tourism,5
3,Carol Limpet,waterfront_market,tourism,4
4,Ed Helpsford,affordable_housing,other,7
5,Ed Helpsford,concert,tourism,4
6,Ed Helpsford,low_volume_crane,fishing,2
7,Ed Helpsford,name_harbor_area,fishing,2
8,Ed Helpsford,name_inspection_office,other,2
9,Ed Helpsford,waterfront_market,tourism,3


In [84]:
# Extraer sentimiento desde links
sentiment_links = []
for link in journalist_data["links"]:
    if link.get("role") == "participant" and "sentiment" in link:
        sentiment_links.append({
            "event_id": link.get("source"),
            "person_id": link.get("target"),
            "sentiment": link.get("sentiment")
        })

# Crear DataFrame de sentimientos
df_sentiment = pd.DataFrame(sentiment_links)

# Hacer merge con df_person_events para traer el sentimiento
df_with_sentiment = pd.merge(
    df_meeting_time,
    df_sentiment,
    on=["event_id", "person_id"],
    how="left"
)

# Agrupar para contar participaciones y mantener industry y sentimiento promedio por evento/persona
df_attendance_sentiment = df_with_sentiment.groupby(
    ["person_id", "event_name", "industry"]
).agg(
    attended=("event_id", "count"),
    avg_sentiment=("sentiment", "mean"),
    total_sentiment=("sentiment", "sum")
).reset_index()

df_attendance_sentiment

Unnamed: 0,person_id,event_name,industry,attended,avg_sentiment,total_sentiment
0,Carol Limpet,marine_life_deck,tourism,7,0.5,3.5
1,Carol Limpet,renaming_park_himark,tourism,5,0.5,2.5
2,Carol Limpet,seafood_festival,tourism,5,0.75,3.75
3,Carol Limpet,waterfront_market,tourism,4,1.0,4.0
4,Ed Helpsford,affordable_housing,other,7,1.0,7.0
5,Ed Helpsford,concert,tourism,4,0.5,2.0
6,Ed Helpsford,low_volume_crane,fishing,2,1.0,2.0
7,Ed Helpsford,name_harbor_area,fishing,2,0.0,0.0
8,Ed Helpsford,name_inspection_office,other,2,0.0,0.0
9,Ed Helpsford,waterfront_market,tourism,3,1.0,3.0


In [85]:
df_plot = df_attendance_sentiment.copy()

# Definir colores para el sentimiento (positivo, neutro, negativo)
def map_color(sentiment):
    if sentiment > 0.2:
        return "green"
    elif sentiment < -0.2:
        return "red"
    else:
        return "gray"

df_plot["color"] = df_plot["avg_sentiment"].apply(map_color)

# Definir formas para las industrias
symbol_map = {"turismo": "circle", "pesca": "square", "otra": "diamond"}
df_plot["symbol"] = df_plot["industry"].map(symbol_map)

# Crear gráfico con Plotly Express
fig = px.scatter(
    df_plot,
    x="person_id",
    y="event_name",
    size="attended",
    color="avg_sentiment",
    symbol="industry",
    color_continuous_scale=["red", "gray", "green"],
    labels={
        "person_id": "Miembro de COOTEFOO",
        "event_name": "Nombre del Evento",
        "attended": "Participaciones",
        "avg_sentiment": "Sentimiento Promedio",
        "industry": "Industria"
    },
    title="Sentimiento de los Miembros de COOTEFOO hacia las Industrias por Evento"
)

fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
fig.update_layout(xaxis_tickangle=-45)

fig.show()

In [86]:
import plotly.graph_objects as go

industry_order = {"fishing": 0, "other": 1, "tourism": 2}
df_plot["industry_sort"] = df_plot["industry"].map(industry_order)

# Sort the dataframe by industry first, then event_name
df_plot = df_plot.sort_values(by=["industry_sort", "event_name"], ascending=[True, True])

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=df_plot["person_id"],
    y=df_plot["event_name"],
    mode="markers",
    marker=dict(
        size=df_plot["attended"] * 5,
        color=df_plot["color"],
        line=dict(color=df_plot["border_color"], width=3)
    ),
    text=df_plot["avg_sentiment"],
    hovertemplate="<b>%{y}</b><br>Participante: %{x}<br>Sentimiento: %{text}<extra></extra>"
))

fig.update_layout(
    title="Sentimiento de los miembros de COOTEFOO hacia los eventos",
    xaxis_title="Miembro de COOTEFOO",
    yaxis_title="Nombre del evento",
    xaxis_tickangle=-45,
    height=700
)

industry_legend = [
    {"name": "Turismo", "color": "goldenrod"},
    {"name": "Pesca", "color": "royalblue"},
    {"name": "Otra", "color": "lightgray"}
]

for entry in industry_legend:
    fig.add_trace(go.Scatter(
        x=[None],
        y=[None],
        mode='markers',
        marker=dict(
            size=10,
            color='white',  # no fill
            line=dict(color=entry["color"], width=3),
            symbol='circle'
        ),
        name=f"{entry['name']}",
        legendgroup="industry",
        showlegend=True,
    ))

# Add sentiment legend (fill colors)
sentiment_legend = [
    {"name": "Positivo", "color": "green"},
    {"name": "Neutro", "color": "gray"},
    {"name": "Negativo", "color": "red"}
]

for entry in sentiment_legend:
    fig.add_trace(go.Scatter(
        x=[None],
        y=[None],
        mode='markers',
        marker=dict(
            size=10,
            color=entry["color"],
            line=dict(color='black', width=0.5),
            symbol='circle'
        ),
        name=f"Sentimiento: {entry['name']}",
        legendgroup="sentiment",
        showlegend=True,
    ))

fig.show()


KeyError: 'border_color'

In [87]:
industry_order = {"fishing": 0, "other": 1, "tourism": 2}
df_plot = df_attendance_sentiment.copy()
df_plot["industry_order"] = df_plot["industry"].map(industry_order)
df_plot = df_plot.sort_values(by=["industry_order", "event_name"])

# Map colors for sentiment
def map_color(sentiment):
    if sentiment > 0.2:
        return "green"
    elif sentiment < -0.2:
        return "red"
    else:
        return "gray"
df_plot["color"] = df_plot["avg_sentiment"].apply(map_color)

# Map border colors for industry
border_color_map = {"tourism": "goldenrod", "fishing": "royalblue", "other": "lightgray"}
df_plot["border_color"] = df_plot["industry"].map(border_color_map)

# Create main scatter trace (invisible in legend)
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df_plot["person_id"],
    y=df_plot["event_name"],
    mode="markers",
    marker=dict(
        size=df_plot["attended"] * 5,
        color=df_plot["color"],
        line=dict(color=df_plot["border_color"], width=3)
    ),
    text=df_plot["avg_sentiment"],
    hovertemplate="<b>%{y}</b><br>Participante: %{x}<br>Sentimiento: %{text}<extra></extra>",
    showlegend=False
))

# Dummy title for industry legend
fig.add_trace(go.Scatter(
    x=[None], y=[None],
    mode='markers',
    marker=dict(size=0, opacity=0),
    legendgroup="industry",
    showlegend=True,
    name="<b>Industria</b>"
))

# Custom legend items for industry
industry_legend = [
    {"name": "Turismo", "color": "goldenrod"},
    {"name": "Pesca", "color": "royalblue"},
    {"name": "Otra", "color": "lightgray"}
]
for entry in industry_legend:
    fig.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(
            size=10,
            color='white',
            line=dict(color=entry["color"], width=3),
            symbol='circle'
        ),
        name=entry["name"],
        legendgroup="industry",
        showlegend=True
    ))

# Dummy title for sentiment legend
fig.add_trace(go.Scatter(
    x=[None], y=[None],
    mode='markers',
    marker=dict(size=0, opacity=0),
    legendgroup="sentiment",
    showlegend=True,
    name="<b>Sentimiento</b>"
))

# Custom legend items for sentiment
sentiment_legend = [
    {"name": "Positivo", "color": "green"},
    {"name": "Neutro", "color": "gray"},
    {"name": "Negativo", "color": "red"}
]
for entry in sentiment_legend:
    fig.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(
            size=10,
            color=entry["color"],
            line=dict(color='black', width=0.5),
            symbol='circle'
        ),
        name=entry["name"],
        legendgroup="sentiment",
        showlegend=True
    ))

fig.update_layout(
    title="Sentimiento de los miembros de COOTEFOO hacia las industrias por evento",
    xaxis_title="Miembro de COOTEFOO",
    yaxis_title="Nombre del evento",
    xaxis_tickangle=-45,
    height=800
)

fig.show()

In [94]:
import ipywidgets as widgets
import plotly.graph_objects as go

# Asumiendo df_plot ya creado con las columnas ['person_id','event_name','industry','avg_sentiment','attended','color','border_color']

# Mapeo de etiquetas en español a valores internos
industry_map = {
    "Todas": None,
    "Turismo": "tourism",
    "Pesca": "fishing",
    "Otro": "other"
}

# Función para actualizar el gráfico según selección
def plot_by_industry(selected):
    # Filtrar dataframe
    if industry_map[selected]:
        d = df_plot[df_plot["industry"] == industry_map[selected]]
    else:
        d = df_plot.copy()

    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=d["person_id"],
        y=d["event_name"],
        mode="markers",
        marker=dict(
            size=d["attended"] * 5,
            color=d["color"],
            line=dict(color=d["border_color"], width=3)
        ),
        hovertemplate=("<b>%{y}</b><br>"
                       "Participante: %{x}<br>"
                       "Sentimiento: %{text:.2f}<extra></extra>"),
        text=d["avg_sentiment"],
        showlegend=False
    ))

    # Añadir leyendas dummy (Industria y Sentimiento) según tu código previo
    # ... (mantener tu bloque de leyendas aquí) ...

    fig.update_layout(
        title=f"Sentimiento por evento (Filtro: {selected})",
        xaxis_title="Miembro de COOTEFOO",
        yaxis_title="Nombre del evento",
        xaxis_tickangle=-45,
        height=800
    )
    fig.show()

# Widget de selección
dropdown = widgets.Dropdown(
    options=list(industry_map.keys()),
    value="Todas",
    description="Tema:"
)

# Conectar widget y función
widgets.interact(plot_by_industry, selected=dropdown)

interactive(children=(Dropdown(description='Tema:', options=('Todas', 'Turismo', 'Pesca', 'Otro'), value='Toda…

In [93]:
fig.write_html("interactive_plot.html", include_plotlyjs="cdn")

# Tarea 3

In [None]:
#DOWNLOAD df_attendance_sentiment AS CSV
df_attendance_sentiment.to_csv("/content/drive/MyDrive/Colab Notebooks/C3_Visualización_Información/Sentimientos_de_personas_por_evento.csv", index=False)

In [None]:
df_sentimientos = df_attendance_sentiment.copy()
trout_persons = {
    'Seal', 'Ed Helpsford', 'Teddy Goldstein'
}
filah_persons = {
    'Seal', 'Simone Kat', 'Carol Limpet'
}

# Crear columnas de pertenencia
df_sentimientos["in_trout"] = df_sentimientos["person_id"].isin(trout_persons)
df_sentimientos["in_filah"] = df_sentimientos["person_id"].isin(filah_persons)

# Etiquetas de presencia
df_sentimientos["dataset_presence"] = df_sentimientos.apply(
    lambda row: "Ambos" if row["in_trout"] and row["in_filah"]
    else ("Solo TROUT" if row["in_trout"]
          else ("Solo FILAH" if row["in_filah"] else "Ninguno")),
    axis=1
)

SyntaxError: incomplete input (<ipython-input-137-1877958781>, line 18)

In [None]:
#cargar Sentimientos_de_personas_ausentes_en_TROUT.csv
missing_from_trout = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/C3_Visualización_Información/Sentimientos_de_personas_ausentes_en_TROUT.csv")
missing_from_trout

Unnamed: 0,person_name,industry,total_sentiment,participations
0,Carol Limpet,tourism,11.25,16
1,Carol Limpet,unknown,2.5,5
2,Simone Kat,large vessel,-5.8,10
3,Simone Kat,small vessel,4.5,6
4,Simone Kat,tourism,21.0,24
5,Simone Kat,unknown,0.0,2
6,Tante Titan,tourism,10.25,20
7,Tante Titan,unknown,34.0,34


In [None]:
#cargar Sentimientos_de_personas_ausentes_en_FILAH.csv
missing_from_filah = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/C3_Visualización_Información/Sentimientos_de_personas_ausentes_en_FILAH.csv")
missing_from_filah


Unnamed: 0,person_name,industry,total_sentiment,participations
0,Ed Helpsford,large vessel,7.0,7
1,Ed Helpsford,small vessel,2.0,2
2,Ed Helpsford,tourism,5.0,7
3,Ed Helpsford,unknown,0.0,4
4,Tante Titan,tourism,10.25,20
5,Tante Titan,unknown,34.0,34
6,Teddy Goldstein,large vessel,8.5,10
7,Teddy Goldstein,tourism,-3.0,6


In [None]:
fig_trout = px.bar(
    missing_from_trout,
    x="person_name",
    y="total_sentiment",
    color="industry",
    barmode="group",
    title="Sentimiento agregado por industria - Participantes ausentes en TROUT",
    labels={"person_name": "Participante", "total_sentiment": "Sentimiento agregado", "industry": "Industria"},
    color_discrete_map={
        "tourism": "goldenrod",
        "fishing": "royalblue",
        "other": "gray"
    }
)
fig_trout.update_layout(xaxis_tickangle=-45)

# Gráfico para ausentes en FILAH
fig_filah = px.bar(
    missing_from_filah,
    x="person_name",
    y="total_sentiment",
    color="industry",
    barmode="group",
    title="Sentimiento agregado por industria - Participantes ausentes en FILAH",
    labels={"person_name": "Participante", "total_sentiment": "Sentimiento agregado", "industry": "Industria"},
    color_discrete_map={
        "tourism": "goldenrod",
        "fishing": "royalblue",
        "other": "gray"
    }
)
fig_filah.update_layout(xaxis_tickangle=-45)

fig_trout.show()
fig_filah.show()