## Imports

In [1]:
import json
import re
import pandas as pd

## Defining Collection of Keywords

This collection of transport-related keywords was created with the use of the following documents:
- CH4LLENGE - Addressing Key Challenges of Sustainable Urban Mobility Planning (PDF, available in Materials as "3_ch4llenge_sump_glossary.pdf")
- EUROPEAN COMMISSION - Transport in the European Union - Current trends and issues (PDF, available in Materials as "transport in the european union-MI0224640ENN.pdf")
- Glossary for transport statistics - 5th edition, 2019 (PDF, avialable in Materials as "KS-GQ-19-004-EN-N.pdf")
- https://transport.ec.europa.eu/index_en
- https://eur-lex.europa.eu/summary/chapter/transport.html?root_default=SUM_1_CODED%3D32,SUM_2_CODED%3D3207&obsolete=false
- https://www.connectedautomateddriving.eu/glossary-and-taxonomies/glossary/


In [2]:
with open('data/keywords_full.json', 'r', encoding='utf-8') as f:
    keywords_full = set(json.load(f))

In [3]:
print(len(keywords_full))
print(list(keywords_full)[:20])

423
['public transport', 'Security for ships and port facilities', 'Road safety: EU database on road traffic accidents', 'Registration of persons on board passenger ships', 'Personal mobility', 'EU rail transport statistics', 'Automated vehicle', 'Minimal risk condition', 'EU emergency services: rolling out eCall infrastructure', 'Air pollutant emission', 'Deployment of alternative fuels infrastructure', 'Harmonisation of civil aviation requirements and procedures', 'Tactical functions of the DDT', 'Cooperative intelligent transport system', 'Lane marking', 'Taxi service', 'Port facilities for waste from ships', 'Monitoring, reporting and verification of ships’ emissions', 'Public transport app', 'Remote driving']


## Data Import

In [4]:
pd.reset_option("display.max_colwidth")

In [5]:
# use this if you do not want to incldue results from topic modeling
# combined_terms_df = pd.read_csv("data/csv/combined_terms.csv")

# choose the appropriate file depending on which topic modeling results you want to include
# combined_terms_with_top2vec.csv has results from all models if the entire pipeline was run
combined_terms_df = pd.read_csv("data/csv/combined_terms_with_top2vec.csv")
combined_terms_df.head()

Unnamed: 0,date,topic,text,term,clean_text,bertopic_topic,bertopic_prob,is_bertopic,lda_topic,lda_prob,is_lda,nmf_topic,nmf_prob,is_nmf,top2vec_topic,top2vec_score,is_top2vec
0,07-05-1999,1. VOTES,President . – We shall now move on to the vote...,94_99,shall votes regulation ecsc ec euratom incorpo...,11,0.155931,False,85,0.246812,False,99,0.021814,False,16,0.836422,False
1,07-05-1999,2. Potato starch,President . – The next item is the proposal fo...,94_99,regulation amending regulation ec establishing...,4,0.273608,False,17,0.404205,False,45,0.01447,False,6,0.790973,False
2,07-05-1999,3. Hannover 2000,President . – The next item is the report (A4‐...,94_99,hoppenstedt culture youth education media comm...,39,0.194263,False,62,0.431773,False,78,0.042771,False,69,0.67304,False
3,07-05-1999,4. European textiles market,President . – The next item is the joint debat...,94_99,following b ferrer peijs chanterie group peopl...,235,1.0,False,85,0.289002,False,99,0.056262,False,180,0.908692,False
4,06-05-1999,1. Approval of the Minutes,President . – The Minutes of yesterday's sitti...,94_99,distributed spencer ppe chairman foreign affai...,-1,0.0,False,85,0.670585,False,99,0.074247,False,10,0.696468,False


In [6]:
print(combined_terms_df.shape)

(23408, 17)


## Keyword Matching

### Full keywords (including general)

In [7]:
def norm(s: str) -> str:
    s = s.lower()
    s = re.sub(r"[’'`]", " ", s)
    s = re.sub(r"[^a-z\s]", " ", s)
    return re.sub(r"\s+", " ", s).strip()

In [8]:
def get_pattern(keywords):
    # prepare keywords
    norm_kw = {norm(k) for k in keywords if k.strip()}
    pattern = re.compile(r"(?:%s)" % "|".join(map(re.escape, sorted(norm_kw, key=len, reverse=True))))

    return pattern

In [9]:
pattern_full = get_pattern(keywords_full)


In [10]:
# def filter_transport_rows(df, pattern, min_keyword_count=10):
#     df = df.copy()
#     df["norm_text"] = (df["topic"].fillna("") + " " + df["text"].fillna("")).map(norm)
#     df["matched_keywords"] = df["norm_text"].str.findall(pattern)
    
#     df["matched_keywords"] = df["matched_keywords"].apply(lambda xs: sorted(set(xs)))  # remove duplicates and sort
    
#     # flag only rows with >= min_keyword_count unique keyword hits
#     df["is_transport"] = df["matched_keywords"].apply(lambda x: len(x) >= min_keyword_count)
#     df["match_count"] = df["matched_keywords"].str.len()
    
#     transport_df = df.loc[df["is_transport"], ["date", "topic", "text", "term", "matched_keywords", "match_count"]]
    
#     return transport_df

In [11]:
def filter_transport_rows(df, pattern, min_keyword_count=10):
    df = df.copy()
    
    df["norm_text"] = (df["topic"].fillna("") + " " + df["text"].fillna("")).apply(norm)
    df["matched_keywords"] = df["norm_text"].str.findall(pattern)
    
    # use list comprehension instead of apply for better performance
    df["matched_keywords"] = [sorted(set(x)) for x in df["matched_keywords"]]
    
    df["match_count"] = df["matched_keywords"].str.len()
    df["is_transport"] = df["match_count"] >= min_keyword_count
    
    transport_df = df.loc[df["is_transport"], ["date", "topic", "text", "term", "matched_keywords", "match_count"]]
    
    return transport_df

In [12]:
transport_df = filter_transport_rows(combined_terms_df, pattern_full)

In [13]:
transport_df.head()

Unnamed: 0,date,topic,text,term,matched_keywords,match_count
7,06-05-1999,4. VOTES,Confirmation of first or second reading: ‐ Pro...,94_99,"[airport, bus, car, driver, infrastructure, pa...",14
20,05-05-1999,8. Agenda 2000 (continuation),President . – The next item is the continuatio...,94_99,"[bus, car, infrastructure, manoeuvre, railway,...",11
30,04-05-1999,4. VOTES,"President . – Ladies and gentlemen, we shall n...",94_99,"[bus, car, infrastructure, road, ship, sustain...",10
49,03-05-1999,9. General economic policy guidelines – Europe...,President . – The next item is the joint debat...,94_99,"[bus, car, infrastructure, manoeuvre, road, sh...",10
63,15-04-1999,4. Transport infrastructure charging,President . – The next item is the report (A4‐...,94_99,"[bus, car, infrastructure, rail, road, sustain...",13


In [14]:
transport_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2845 entries, 7 to 23381
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   date              2845 non-null   object
 1   topic             2845 non-null   object
 2   text              2845 non-null   object
 3   term              2845 non-null   object
 4   matched_keywords  2845 non-null   object
 5   match_count       2845 non-null   int64 
dtypes: int64(1), object(5)
memory usage: 155.6+ KB


In [15]:
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [16]:
transport_df.sort_values("match_count", ascending=False)[
    ["date", "topic", "matched_keywords", "match_count"]
].head(10)

Unnamed: 0,date,topic,matched_keywords,match_count
12998,06-07-2010,7. Explanations of vote,"[air transport, airport, boat, bus, bus and coach passengers rights, car, driver, freight transport, greenhouse, greenhouse gas emission, infrastructure, inland waterway transport, intelligent transport system, lane, logistics, maritime transport, passenger, passenger car, passenger transport, pedestrian, pilot, public transport, rail, rail transport, railway, reporting formalities for ships, rights of passengers travelling by sea and inland waterways, road, road safety, road transport, road user, ship, sustainability, sustainable transport, sustainable transport system, sustainable urban mobility plan, traffic, train, tram, trans european transport network, transport, transport infrastructure, transport mode, transport system, traveller, truck, urban mobility, vehicle]",48
11820,15-12-2011,10. Explanations of vote,"[air transport, airport, boat, bus, car, driver, freight transport, goods logistics, greenhouse gas emission, infrastructure, logistics, manoeuvre, maritime transport, passenger, passenger transport, pedestrian, pilot, public transport, rail, rail transport, railway, railway transport, risk factor, road, road safety, road transport, roadmap to a single european transport area towards a competitive and resource efficient transport system, ship, single european transport area, sustainability, sustainable mobility, sustainable transport, traffic, train, tram, trans european transport network, transport, transport infrastructure, transport mode, transport system, traveller, triple, truck, urban mobility, vehicle, vessel]",46
14558,12-09-2018,18. Europe on the Move: an agenda for the future of mobility in the EU (debate),"[automated mobility, automated vehicle, bus, car, connected mobility, connected vehicle, driver, freight transport, greenhouse gas emission, infrastructure, intelligent transport system, lane, logistics, mobility as a service, multimodal mobility, passenger, passenger transport, pedestrian, pilot, public transport, rail, railway, road, road safety, road transport, road user, shared mobility, ship, smart mobility, sustainability, sustainable mobility, sustainable transport, traffic, train, trans european transport network, transport, transport infrastructure, transport mode, transport system, trip, triple, truck, urban mobility, vehicle]",44
11407,03-07-2012,7. Explanations of vote,"[air transport, bus, car, driver, freight transport, greenhouse gas emission, infrastructure, intelligent transport system, lane, manoeuvre, maritime transport, motorcycle, passenger, passenger transport, pilot, rail, rail transport, railway, railway transport, remote surveillance, road, road network, road safety, road transport, road user, road vehicle, roadside, ship, sustainability, sustainable transport, tachographs in road transport, takeover, traffic, traffic management, train, trans european transport network, transport, transport infrastructure, transport mode, transport system, traveller, trip, truck, vehicle]",44
12057,27-09-2011,9. Explanations of vote,"[agreements on air services, air transport, aircraft, airport, boat, bus, car, cross border transportation of euro cash by road, driver, greenhouse, greenhouse gas emission, infrastructure, intelligent speed assistance, lane, logistics, manoeuvre, passenger, pedestrian, pilot, rail transport, railway, road, road network, road safety, road transport, road user, roadside, ship, sustainability, traffic, traffic management, train, tram, trans european transport network, transport, transport infrastructure, transport system, traveller, trip, truck, vehicle, vehicle occupant, vessel, vulnerable road user]",44
17698,10-09-2015,9.3. Implementation of the 2011 White paper on transport (A8-0246/2015 - Wim van de Camp),"[air transport, airport, bus, car, driver, freight transport, greenhouse, greenhouse gas emission, infrastructure, intelligent transport system, logistics, maritime transport, passenger, passenger car, passenger transport, pedestrian, public transport, rail, rail transport, railway, road, road network, road safety, road transport, road user, ship, single european transport area, sustainability, sustainable mobility, sustainable transport, traffic, train, trans european transport network, transport, transport infrastructure, transport mode, transport system, urban mobility, vehicle, vessel]",40
17755,08-09-2015,16. Implementation of the 2011 White paper on transport (debate),"[air transport, airplane, airport, bus, car, driver, greenhouse, greenhouse gas emission, infrastructure, intelligent transport system, maritime transport, motorcycle, passenger, passenger car, pilot, public transport, rail, rail transport, railway, railway transport, road, road safety, road transport, ship, single european transport area, sustainability, sustainable mobility, traffic, traffic management, train, tram, trans european transport network, transport, transport infrastructure, transport mode, transport system, truck, urban mobility, vehicle]",39
22723,17-06-2025,"18. EU framework conditions for competitive, efficient and sustainable public transport services at all levels (debate)","[aircraft, airplane, airport, bus, car, driver, greenhouse gas emission, infrastructure, lane, logistics, maritime transport, passenger, passenger car, public transport, rail, rail transport, railway, railway transport, road, shared mobility, ship, sustainability, sustainable mobility, sustainable transport, sustainable transport system, sustainable urban mobility plan, traffic, train, tram, trans european transport network, transport, transport infrastructure, transport mode, transport system, trip, truck, urban mobility, urban mobility framework, vehicle]",39
17395,02-12-2015,20.5. Sustainable urban mobility (A8-0319/2015 - Karima Delli),"[bus, car, conventional vehicle, driver, freight transport, greenhouse, greenhouse gas emission, infrastructure, lane, logistics, passenger, passenger car, pedestrian, public transport, rail, railway, road, road safety, road user, ship, smart mobility, sustainability, sustainable mobility, sustainable transport, sustainable urban mobility plan, traffic, traffic volume, train, tram, transport, transport equipment, transport infrastructure, transport mode, transport sharing, transport system, trip, urban mobility, urban mobility in the eu, vehicle]",39
6087,10-03-2008,"20. Sustainable European transport policy, taking into account European energy and environment policies (debate)","[air transport, airport, bus, car, environmental impact of transport, freight transport, greenhouse, greenhouse gas emission, infrastructure, inland waterway transport, intelligent transport system, intermodal freight transport, logistics, maritime transport, passenger, public transport, rail, rail transport, railway, road, road network, road transport, road user, ship, sustainable mobility, sustainable transport, sustainable transport system, the transport protocol of the alpine convention, traffic, traffic management, train, tram, trans european transport network, transport, transport infrastructure, transport system, urban mobility, vehicle]",38


In [17]:
pd.reset_option("display.max_colwidth")

In [18]:
transport_df.to_csv("data/csv/transport.csv", index=False)

In [19]:
# we can use the indices from transport_df to set values for the is_full column in the original dataframe
# transport_df.index.tolist()

In [20]:
# add is_full column to combined_terms_df based on transport_df
combined_terms_df['is_full'] = combined_terms_df.index.isin(transport_df.index)

### Shortened keyword list

In [21]:
with open('data/keywords_shortened.json', 'r', encoding='utf-8') as f:
    keywords_shortened = set(json.load(f))

In [22]:
print(len(keywords_shortened))
print(list(keywords_shortened)[:20])

358
['public transport', 'Security for ships and port facilities', 'Road safety: EU database on road traffic accidents', 'Registration of persons on board passenger ships', 'Personal mobility', 'EU rail transport statistics', 'Minimal risk condition', 'EU emergency services: rolling out eCall infrastructure', 'Air pollutant emission', 'Deployment of alternative fuels infrastructure', 'Harmonisation of civil aviation requirements and procedures', 'Tactical functions of the DDT', 'Cooperative intelligent transport system', 'Lane marking', 'Taxi service', 'Port facilities for waste from ships', 'Monitoring, reporting and verification of ships’ emissions', 'Public transport app', 'Remote driving', 'Pipeline transport']


In [23]:
# words that have been removed from the full list
removed_keywords = keywords_full - keywords_shortened
print(len(removed_keywords))
print(list(removed_keywords))

65
['Goods logistics', 'Driving behaviour', 'bus', 'Human-machine interaction', 'Special purpose vehicle', 'Approaching vehicle', 'Test run', 'Electronic stability control', 'Road', 'vehicle', 'Infrastructure', 'Vehicle', 'Automated vehicle', 'transport', 'ship', 'aircraft', 'Sustainability', 'Passenger', 'Logistics', 'Roundabout', 'car', 'Risk factor', 'Trip', 'Roadside', 'Motorcycle', 'Hazard analysis and risk assessment', 'Bus', 'Lane', 'Proving ground', 'Traffic', 'Fleet vehicle', 'Connected vehicle', 'Crash avoidance features', 'Pedestrian', 'boat', 'road', 'Testing infrastructure', 'Test scenario', 'Truck', 'railway', 'Triple', 'Road user', 'In-service monitoring', 'Driver', 'rail', 'airport', 'Carriageway', 'Pilot', 'greenhouse', 'tram', 'Intersection', 'Road vehicle', 'vessel', 'airplane', 'Functional safety', 'Roadway', 'Driving dynamics', 'Bicyclist', 'Travel behaviour', 'train', 'Operational safety', 'Human-machine interface', 'Traveller', 'Conventional vehicle', 'Manoeuvre'

In [24]:
pattern_shortened = get_pattern(keywords_shortened)

In [25]:
transport_df_shortened = filter_transport_rows(combined_terms_df, pattern_shortened, min_keyword_count=7) # use less strict criteria for shortened list

In [26]:
transport_df_shortened.head()

Unnamed: 0,date,topic,text,term,matched_keywords,match_count
71,15-04-1999,12. VOTES (continuation),"Report (A4‐0136/99) by Mrs Peijs, on behalf of...",94_99,"[air transport, passenger car, road network, r...",7
87,13-04-1999,7. Working time,President . – The next item is the joint debat...,94_99,"[maritime transport, passenger transport, publ...",8
134,10-03-1999,6. VOTES,"Report (A4‐0063/99) by Mrs Hardstaff, on behal...",94_99,"[air transport, freight transport, passenger t...",10
158,09-03-1999,16. Railways,President . – The next item is the joint debat...,94_99,"[air transport, freight transport, passenger c...",10
177,24-02-1999,7. Harmonisation of social legislation on tran...,President . – The next item is the report (A4‐...,94_99,"[air transport, inland waterway transport, mar...",8


In [27]:
transport_df_shortened.info()

<class 'pandas.core.frame.DataFrame'>
Index: 187 entries, 71 to 23242
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   date              187 non-null    object
 1   topic             187 non-null    object
 2   text              187 non-null    object
 3   term              187 non-null    object
 4   matched_keywords  187 non-null    object
 5   match_count       187 non-null    int64 
dtypes: int64(1), object(5)
memory usage: 10.2+ KB


In [28]:
pd.set_option("display.max_colwidth", None)

In [29]:
transport_df_shortened.sort_values("match_count", ascending=False)[
    ["date", "topic", "matched_keywords", "match_count"]
].head(10)

Unnamed: 0,date,topic,matched_keywords,match_count
12998,06-07-2010,7. Explanations of vote,"[air transport, bus and coach passengers rights, freight transport, greenhouse gas emission, inland waterway transport, intelligent transport system, maritime transport, passenger car, passenger transport, public transport, rail transport, reporting formalities for ships, rights of passengers travelling by sea and inland waterways, road safety, road transport, sustainable transport, sustainable transport system, sustainable urban mobility plan, trans european transport network, transport infrastructure, transport mode, transport system, urban mobility]",23
6087,10-03-2008,"20. Sustainable European transport policy, taking into account European energy and environment policies (debate)","[air transport, environmental impact of transport, freight transport, greenhouse gas emission, inland waterway transport, intelligent transport system, intermodal freight transport, maritime transport, public transport, rail transport, road network, road transport, sustainable mobility, sustainable transport, sustainable transport system, the transport protocol of the alpine convention, traffic management, trans european transport network, transport infrastructure, transport system, urban mobility]",21
11407,03-07-2012,7. Explanations of vote,"[air transport, freight transport, greenhouse gas emission, intelligent transport system, maritime transport, passenger transport, rail transport, railway transport, remote surveillance, road network, road safety, road transport, sustainable transport, tachographs in road transport, takeover, traffic management, trans european transport network, transport infrastructure, transport mode, transport system]",20
14558,12-09-2018,18. Europe on the Move: an agenda for the future of mobility in the EU (debate),"[automated mobility, connected mobility, freight transport, greenhouse gas emission, intelligent transport system, mobility as a service, multimodal mobility, passenger transport, public transport, road safety, road transport, shared mobility, smart mobility, sustainable mobility, sustainable transport, trans european transport network, transport infrastructure, transport mode, transport system, urban mobility]",20
17698,10-09-2015,9.3. Implementation of the 2011 White paper on transport (A8-0246/2015 - Wim van de Camp),"[air transport, freight transport, greenhouse gas emission, intelligent transport system, maritime transport, passenger car, passenger transport, public transport, rail transport, road network, road safety, road transport, single european transport area, sustainable mobility, sustainable transport, trans european transport network, transport infrastructure, transport mode, transport system, urban mobility]",20
11820,15-12-2011,10. Explanations of vote,"[air transport, freight transport, greenhouse gas emission, maritime transport, passenger transport, public transport, rail transport, railway transport, road safety, road transport, roadmap to a single european transport area towards a competitive and resource efficient transport system, single european transport area, sustainable mobility, sustainable transport, trans european transport network, transport infrastructure, transport mode, transport system, urban mobility]",19
13014,05-07-2010,18. A sustainable future for transport (debate),"[air transport, freight transport, inland waterway transport, intelligent transport system, maritime transport, personal mobility, public transport, rail transport, road network, road safety, road transport, sustainable transport, sustainable transport system, traffic management, trans european transport network, transport equipment, transport infrastructure, transport system, urban mobility]",19
17755,08-09-2015,16. Implementation of the 2011 White paper on transport (debate),"[air transport, greenhouse gas emission, intelligent transport system, maritime transport, passenger car, public transport, rail transport, railway transport, road safety, road transport, single european transport area, sustainable mobility, traffic management, trans european transport network, transport infrastructure, transport mode, transport system, urban mobility]",18
16372,25-10-2016,8.18. Improving connection and accessibility of transport infrastructure in Central and Eastern Europe (A8-0282/2016 - Tomasz Piotr Poręba),"[air transport, freight transport, inland waterway transport, passenger mobility, passenger transport, personal mobility, public transport, rail transport, railway transport, road network, road safety, road transport, sustainable transport, trans european transport network, transport infrastructure, transport mode, transport system, urban mobility]",18
11838,14-12-2011,16. Single European transport area (debate),"[air transport, freight transport, greenhouse gas emission, passenger transport, public transport, rail transport, road safety, road transport, roadmap to a single european transport area towards a competitive and resource efficient transport system, single european transport area, sustainable mobility, sustainable transport, traffic management, trans european transport network, transport infrastructure, transport mode, transport system, urban mobility]",18


In [30]:
pd.reset_option("display.max_colwidth")

In [31]:
transport_df_shortened.to_csv("data/csv/transport_shortened.csv", index=False)

In [32]:
# add is_shortened column to combined_terms_df based on transport_shortened
combined_terms_df['is_shortened'] = combined_terms_df.index.isin(transport_df_shortened.index)

In [36]:
# also add keyword lists from both approaches to final dataframe

keywords_full_dict = transport_df['matched_keywords'].to_dict()
keywords_shortened_dict = transport_df_shortened['matched_keywords'].to_dict()

combined_terms_df['keywords_full'] = [keywords_full_dict.get(idx, []) for idx in combined_terms_df.index]
combined_terms_df['keywords_shortened'] = [keywords_shortened_dict.get(idx, []) for idx in combined_terms_df.index]

In [38]:
# show some rows where either is_full or is_shortened is True
combined_terms_df[combined_terms_df['is_full'] | combined_terms_df['is_shortened']].head()

Unnamed: 0,date,topic,text,term,clean_text,bertopic_topic,bertopic_prob,is_bertopic,lda_topic,lda_prob,...,nmf_topic,nmf_prob,is_nmf,top2vec_topic,top2vec_score,is_top2vec,is_full,is_shortened,keywords_full,keywords_shortened
7,06-05-1999,4. VOTES,Confirmation of first or second reading: ‐ Pro...,94_99,confirmation directive amending directive eec ...,11,1.0,False,37,0.21525,...,99,0.026774,False,16,0.843454,False,True,False,"[airport, bus, car, driver, infrastructure, pa...",[]
20,05-05-1999,8. Agenda 2000 (continuation),President . – The next item is the continuatio...,94_99,continuation hnsch pse wish achieve objectives...,-1,0.0,False,62,0.387022,...,49,0.026635,False,24,0.786473,False,True,False,"[bus, car, infrastructure, manoeuvre, railway,...",[]
30,04-05-1999,4. VOTES,"President . – Ladies and gentlemen, we shall n...",94_99,shall proceed long list votes contains new ele...,11,1.0,False,37,0.242227,...,11,0.044343,False,16,0.827787,False,True,False,"[bus, car, infrastructure, road, ship, sustain...",[]
49,03-05-1999,9. General economic policy guidelines – Europe...,President . – The next item is the joint debat...,94_99,following reports fourans economic monetary af...,228,1.0,False,47,0.680714,...,32,0.08457,False,13,0.791948,False,True,False,"[bus, car, infrastructure, manoeuvre, road, sh...",[]
63,15-04-1999,4. Transport infrastructure charging,President . – The next item is the report (A4‐...,94_99,schmidbauer transport tourism white paper enti...,64,1.0,True,19,0.388259,...,22,0.055263,True,15,0.826061,True,True,False,"[bus, car, infrastructure, rail, road, sustain...",[]


In [39]:
# save final dataframe
combined_terms_df.to_csv("data/csv/combined_terms_final.csv", index=False)