In [3]:
#load data
import yaml
import pandas as pd

with open('legislators-social-media.yaml') as f:
    data = yaml.safe_load(f)

# Convert nested YAML objects into a flat DataFrame
df_rep_media = pd.json_normalize(data)

df_rep_media.head()


Unnamed: 0,id.bioguide,id.thomas,id.govtrack,social.twitter,social.facebook,social.youtube_id,social.twitter_id,social.youtube,social.instagram,social.instagram_id,social.mastodon
0,R000600,2222,412664.0,RepAmata,aumuaamata,UCGdrLQbt1PYDTPsampx4t1A,3026623000.0,,,,
1,Y000064,2019,412428.0,SenToddYoung,SenatorToddYoung,UCuknj4PGn91gHDNAfboZEgQ,234128500.0,RepToddYoung,sentoddyoung,,
2,E000295,2283,412667.0,SenJoniErnst,senjoniernst,UCLwrmtF_84FIcK3TyMs4MIw,2856788000.0,,senjoniernst,1582703000.0,
3,T000476,2291,412668.0,SenThomTillis,SenatorThomTillis,UCUD9VGV4SSGWjGdbn37Ea2w,2964175000.0,,senthomtillis,,
4,W000809,1991,412402.0,Rep_SteveWomack,RepSteveWomack,UCXJbUDLYX-wGIhRuN66hqZw,234469300.0,CongressmanWomack,rep_stevewomack,,


In [9]:
from datetime import datetime

#load data with representative names and bioguide numbers

with open("legislators-current.yaml") as f:
    legislators = yaml.safe_load(f)

#filter for info under current term
def get_current_term(terms, as_of="2025-01-03"):
    ref = datetime.fromisoformat(as_of)
    current = None
    
    for t in terms:
        start = datetime.fromisoformat(t["start"])
        end = datetime.fromisoformat(t["end"])
        if start <= ref <= end:
            current = t
    
    # if none explicitly cover the date, take the last term
    if current is None:
        current = terms[-1]
    
    return current


In [15]:
#create df with info per legislator (rows) and add specific columns
rows = []

for leg in legislators:
    term = get_current_term(leg["terms"])  # this is their current/active term

    rows.append({
        "id.bioguide": leg["id"]["bioguide"],
        "first": leg["name"]["first"],
        "last": leg["name"]["last"],
        "current_type": term["type"],       # "sen" or "rep"
        "current_party": term["party"],     # e.g. "Republican", "Democrat"
        "current_state": term["state"],
        "term_start": term["start"],
        "term_end": term["end"],
    })

df_legs = pd.DataFrame(rows)
df_legs.head()

Unnamed: 0,id.bioguide,first,last,current_type,current_party,current_state,term_start,term_end
0,C000127,Maria,Cantwell,sen,Democrat,WA,2025-01-03,2031-01-03
1,K000367,Amy,Klobuchar,sen,Democrat,MN,2025-01-03,2031-01-03
2,S000033,Bernard,Sanders,sen,Independent,VT,2025-01-03,2031-01-03
3,W000802,Sheldon,Whitehouse,sen,Democrat,RI,2025-01-03,2031-01-03
4,B001261,John,Barrasso,sen,Republican,WY,2025-01-03,2031-01-03


In [14]:
len(df_legs)

539

In [17]:
#merge dataframes

df_legs_media = df_rep_media.merge(df_legs, on="id.bioguide", how="left")
df_legs_media.head()

Unnamed: 0,id.bioguide,id.thomas,id.govtrack,social.twitter,social.facebook,social.youtube_id,social.twitter_id,social.youtube,social.instagram,social.instagram_id,social.mastodon,first,last,current_type,current_party,current_state,term_start,term_end
0,R000600,2222,412664.0,RepAmata,aumuaamata,UCGdrLQbt1PYDTPsampx4t1A,3026623000.0,,,,,Aumua Amata,Radewagen,rep,Republican,AS,2025-01-03,2027-01-03
1,Y000064,2019,412428.0,SenToddYoung,SenatorToddYoung,UCuknj4PGn91gHDNAfboZEgQ,234128500.0,RepToddYoung,sentoddyoung,,,Todd,Young,sen,Republican,IN,2023-01-03,2029-01-03
2,E000295,2283,412667.0,SenJoniErnst,senjoniernst,UCLwrmtF_84FIcK3TyMs4MIw,2856788000.0,,senjoniernst,1582703000.0,,Joni,Ernst,sen,Republican,IA,2021-01-03,2027-01-03
3,T000476,2291,412668.0,SenThomTillis,SenatorThomTillis,UCUD9VGV4SSGWjGdbn37Ea2w,2964175000.0,,senthomtillis,,,Thom,Tillis,sen,Republican,NC,2021-01-03,2027-01-03
4,W000809,1991,412402.0,Rep_SteveWomack,RepSteveWomack,UCXJbUDLYX-wGIhRuN66hqZw,234469300.0,CongressmanWomack,rep_stevewomack,,,Steve,Womack,rep,Republican,AR,2025-01-03,2027-01-03


In [36]:
#drop unecessary columns
#df_legs_media = df_legs_media.drop(columns=["id.thomas", "id.govtrack", "social.mastodon"])

#reorganize columns
df_legs_media = df_legs_media[["first", "last", "id.bioguide", "current_type", "current_party", "current_state", "term_start", "term_end", "social.youtube", "social.youtube_id",  "social.twitter", "social.twitter_id", "social.instagram","social.instagram_id", "social.facebook"]]

df_legs_media.tail()

Unnamed: 0,first,last,id.bioguide,current_type,current_party,current_state,term_start,term_end,social.youtube,social.youtube_id,social.twitter,social.twitter_id,social.instagram,social.instagram_id,social.facebook
522,Sarah,McBride,M001238,rep,Democrat,DE,2025-01-03,2027-01-03,,,,,congresswomansarahmcbride,,CongresswomanSarahMcBride
523,Mark,Messmer,M001233,rep,Republican,IN,2025-01-03,2027-01-03,,,,,HouseRepublicans,,CongressmanMessmer
524,Josh,Riley,R000622,rep,Democrat,NY,2025-01-03,2027-01-03,,,,,RepRileyNY,,RepRileyNY
525,Jim,Justice,J000312,sen,Republican,WV,2025-01-14,2031-01-03,,,,,senjimjustice,,100044187231175
526,Lance,Gooden,G000589,rep,Republican,TX,2025-01-03,2027-01-03,UCaEs0pYlL_1cLlPBHfl0RIg,,,,,,


In [25]:
#save merged df as csv
df_legs_media.to_csv("df_legs_media.csv", index=False)

In [35]:
#load clean merged df 
clean_legs_media = pd.read_csv("clean_data/clean-df_legs_media.csv")

clean_legs_media.tail()

Unnamed: 0,first,last,id.bioguide,current_type,current_party,current_state,term_start,term_end,social.youtube,social.youtube_id,social.twitter,social.twitter_id,social.instagram,social.instagram_id,social.facebook
522,Sarah,McBride,M001238,rep,Democrat,DE,2025-01-03,2027-01-03,,,,,congresswomansarahmcbride,,CongresswomanSarahMcBride
523,Mark,Messmer,M001233,rep,Republican,IN,2025-01-03,2027-01-03,,,,,HouseRepublicans,,CongressmanMessmer
524,Josh,Riley,R000622,rep,Democrat,NY,2025-01-03,2027-01-03,,,,,RepRileyNY,,RepRileyNY
525,Jim,Justice,J000312,sen,Republican,WV,2025-01-14,2031-01-03,,,,,senjimjustice,,100044187231175
526,Lance,Gooden,G000589,rep,Republican,TX,2025-01-03,2027-01-03,,UCaEs0pYlL_1cLlPBHfl0RIg,,,,,


In [47]:
#new df filtering reps with youtube channels
df_reps_with_youtube = clean_legs_media[
    ((clean_legs_media["current_type"] == "rep") &  
     clean_legs_media["social.youtube_id"].notna()
    )
]

len(df_reps_with_youtube)

242

In [49]:
df_reps_with_youtube.tail()

Unnamed: 0,first,last,id.bioguide,current_type,current_party,current_state,term_start,term_end,social.youtube,social.youtube_id,social.twitter,social.twitter_id,social.instagram,social.instagram_id,social.facebook
447,Harriet,Hageman,H001096,rep,Republican,WY,2025-01-03,2027-01-03,@rephageman,UCYafjxwsbUrWxavqKPY4B-A,RepHageman,,,,
451,Celeste,Maloy,M001228,rep,Republican,UT,2025-01-03,2027-01-03,@RepCelesteMaloy,UCqHNoy1lbxNZhbMQUuxkb1Q,RepMaloyUtah,,RepCelesteMaloy,,61555755717517
453,Timothy,Kennedy,K000402,rep,Democrat,NY,2025-01-03,2027-01-03,@reptimkennedy,UCDB-BzTPkdKjZvWLdRa0lUA,reptimkennedy,,reptimkennedy,,reptimkennedy
469,Jeff,Hurd,H001100,rep,Republican,CO,2025-01-03,2027-01-03,,UCETJIbCBqsc2x9X1eT7g5bw,RepJeffHurd,,repjeffhurd,,61571570223582
526,Lance,Gooden,G000589,rep,Republican,TX,2025-01-03,2027-01-03,,UCaEs0pYlL_1cLlPBHfl0RIg,,,,,


In [50]:
#create column with youtube url using youtube ID indo
df_reps_with_youtube["youtube_url"] = df_reps_with_youtube["social.youtube_id"].apply(
    lambda x: f"https://www.youtube.com/channel/{x}" if pd.notna(x) else None
)

df_reps_with_youtube.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reps_with_youtube["youtube_url"] = df_reps_with_youtube["social.youtube_id"].apply(


Unnamed: 0,first,last,id.bioguide,current_type,current_party,current_state,term_start,term_end,social.youtube,social.youtube_id,social.twitter,social.twitter_id,social.instagram,social.instagram_id,social.facebook,youtube_url
0,Aumua Amata,Radewagen,R000600,rep,Republican,AS,2025-01-03,2027-01-03,,UCGdrLQbt1PYDTPsampx4t1A,RepAmata,3026623000.0,,,aumuaamata,https://www.youtube.com/channel/UCGdrLQbt1PYDT...
4,Steve,Womack,W000809,rep,Republican,AR,2025-01-03,2027-01-03,CongressmanWomack,UCXJbUDLYX-wGIhRuN66hqZw,Rep_SteveWomack,234469300.0,rep_stevewomack,,RepSteveWomack,https://www.youtube.com/channel/UCXJbUDLYX-wGI...
5,Frederica,Wilson,W000808,rep,Democrat,FL,2025-01-03,2027-01-03,repfredericawilson,UCP5QBhng_lHv-vJgE_h7lpA,RepWilson,234014100.0,repwilson,,RepWilson,https://www.youtube.com/channel/UCP5QBhng_lHv-...
6,Daniel,Webster,W000806,rep,Republican,FL,2025-01-03,2027-01-03,repdanwebster,UCCoX4VdU7U11eGEA0lbRtLw,RepWebster,281540700.0,,,RepWebster,https://www.youtube.com/channel/UCCoX4VdU7U11e...
8,Robert,Wittman,W000804,rep,Republican,VA,2025-01-03,2027-01-03,RobWittman,UCIqIb-OaTbkIdU426eYIdPg,RobWittman,15356410.0,reprobwittman,,RepRobWittman,https://www.youtube.com/channel/UCIqIb-OaTbkId...


In [51]:
#save df as csv 
df_reps_with_youtube.to_csv("df_reps_with_youtube.csv", index=False)

In [54]:
#data frame with democrats only
dems_youtube = df_reps_with_youtube[df_reps_with_youtube["current_party"] == "Democrat"]

dems_youtube.head()

Unnamed: 0,first,last,id.bioguide,current_type,current_party,current_state,term_start,term_end,social.youtube,social.youtube_id,social.twitter,social.twitter_id,social.instagram,social.instagram_id,social.facebook,youtube_url
5,Frederica,Wilson,W000808,rep,Democrat,FL,2025-01-03,2027-01-03,repfredericawilson,UCP5QBhng_lHv-vJgE_h7lpA,RepWilson,234014100.0,repwilson,,RepWilson,https://www.youtube.com/channel/UCP5QBhng_lHv-...
12,Debbie,Wasserman Schultz,W000797,rep,Democrat,FL,2025-01-03,2027-01-03,RepWassermanSchultz,UCfHQiJVvMlYbVAxrMSLdO4w,RepDWStweets,1140648000.0,RepDWSTweets,,RepDWS,https://www.youtube.com/channel/UCfHQiJVvMlYbV...
16,Maxine,Waters,W000187,rep,Democrat,CA,2025-01-03,2027-01-03,MaxineWaters,UCOQ2js1VYFlo74n7Ns8WsLQ,RepMaxineWaters,36686040.0,repmaxinewaters,,MaxineWaters,https://www.youtube.com/channel/UCOQ2js1VYFlo7...
18,Nydia,Velázquez,V000081,rep,Democrat,NY,2025-01-03,2027-01-03,nydiavelazquez,UCqMKPkKeeHFOOiZ9vxhXGVg,NydiaVelazquez,164369300.0,,,8037068318,https://www.youtube.com/channel/UCqMKPkKeeHFOO...
19,Paul,Tonko,T000469,rep,Democrat,NY,2025-01-03,2027-01-03,reppaultonko,UC3P2SGFmmuCYRsjeso0hOQQ,RepPaulTonko,84119350.0,reppaultonko,,reppaultonko,https://www.youtube.com/channel/UC3P2SGFmmuCYR...


In [55]:
len(dems_youtube)

120

In [56]:
#data frame with republicans only
gop_youtube = df_reps_with_youtube[df_reps_with_youtube["current_party"] == "Republican"]

gop_youtube.head()

Unnamed: 0,first,last,id.bioguide,current_type,current_party,current_state,term_start,term_end,social.youtube,social.youtube_id,social.twitter,social.twitter_id,social.instagram,social.instagram_id,social.facebook,youtube_url
0,Aumua Amata,Radewagen,R000600,rep,Republican,AS,2025-01-03,2027-01-03,,UCGdrLQbt1PYDTPsampx4t1A,RepAmata,3026623000.0,,,aumuaamata,https://www.youtube.com/channel/UCGdrLQbt1PYDT...
4,Steve,Womack,W000809,rep,Republican,AR,2025-01-03,2027-01-03,CongressmanWomack,UCXJbUDLYX-wGIhRuN66hqZw,Rep_SteveWomack,234469300.0,rep_stevewomack,,RepSteveWomack,https://www.youtube.com/channel/UCXJbUDLYX-wGI...
6,Daniel,Webster,W000806,rep,Republican,FL,2025-01-03,2027-01-03,repdanwebster,UCCoX4VdU7U11eGEA0lbRtLw,RepWebster,281540700.0,,,RepWebster,https://www.youtube.com/channel/UCCoX4VdU7U11e...
8,Robert,Wittman,W000804,rep,Republican,VA,2025-01-03,2027-01-03,RobWittman,UCIqIb-OaTbkIdU426eYIdPg,RobWittman,15356410.0,reprobwittman,,RepRobWittman,https://www.youtube.com/channel/UCIqIb-OaTbkId...
11,Tim,Walberg,W000798,rep,Republican,MI,2025-01-03,2027-01-03,RepWalberg,UChpf3_3Wn8f3qSJbsYXrhvg,RepWalberg,237863000.0,repwalberg,,RepWalberg,https://www.youtube.com/channel/UChpf3_3Wn8f3q...


In [57]:
len(gop_youtube)

122