# Flourish CSVs

1. Export from Python
2. Import to Flourish
3. Build Charts

_Formatting CSVs to match the Flourish templates._

In [None]:
import warnings
from pathlib import Path
import inspect
import sys
import plotly.express as px
import numpy as np
import pandas as pd
from ast import literal_eval
from collections import Counter
from itertools import chain

warnings.filterwarnings('ignore')

pd.options.mode.chained_assignment = None

In [4]:
path_to_file = "../scraped_articles_enriched_full.csv"
df = pd.read_csv(path_to_file)

## Line Chart -> Loanword Density Over Time (2016 - 2024)

In [6]:
# For Slide 4, 11, 12
df_line = df.groupby("year")["loanword_density"].mean().reset_index()
df_line.to_csv("loanword_density_by_year.csv", index=False)

## Histogram -> Loanword Density Speed

_Histograms are not available on Flourish_

In [8]:
# For Slide 5
df["density_bin"] = pd.cut(df["loanword_density"], bins=10)
df_hist = df["density_bin"].value_counts().sort_index().reset_index()
df_hist.columns = ["density_bin", "article_count"]
df_hist.to_csv("loanword_density_histogram.csv", index=False)

## Boxplot or Bar -> Topic vs. Loanword Usage

_As only one domain use topic_

In [10]:
# For Slide 6
# df_topic = df.groupby("topic")["loanword_density"].mean().reset_index()
# df_topic.to_csv("loanword_density_by_topic.csv", index=False)

## Sentiment Correlation -> Boxplot / Bar

In [12]:
# For Slide 7
df_sentiment = df.groupby("sentiment")["loanword_density"].mean().reset_index()
df_sentiment.to_csv(
    "loanword_density_by_sentiment.csv", index=False)

## Top Loanwords -> Horizontal Bar Chart

In [13]:
# For Slide 8
all_words = list(chain.from_iterable(df["loanwords"]))
word_freq = Counter(all_words).most_common(20)
df_top = pd.DataFrame(word_freq, columns=["loanword", "frequency"])
df_top.to_csv("top_loanwords.csv", index=False)