# What Are the Dominant U.S. Legislative Narratives on China in 2025

In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
import numpy as np

In [2]:
# Top themes in how Congress talks about China
# What policy areas dominate
# What 'frames' Congress uses ('threat', 'competitator', 'partner')
# which memembers talk about China the most
# Differences between Democrats and Republicans (optional) 

## First get the TF-IDF score 

In [3]:
df = pd.read_pickle("congress2.pickle")
df['china_sentences']

840     [7002), as amended, the Minority Leader appoin...
316     [Congress has the power to enact this legislat...
1127    [(a) Select Committee on the Strategic Competi...
392     [The single subject of this legislation is: To...
213     [A bill to amend the Internal Revenue Code of ...
                              ...                        
925     [2960, a bill to develop economic tools to det...
713     [AMERICAN AI LEADERSHIP Committee on Foreign R...
413     [Congressional Record, Volume 171 Issue 201 (T...
263     [1005) to prohibit elementary and ] secondary ...
370     [Louis, Missouri, Chicago, Illinois, and Bowli...
Name: china_sentences, Length: 1198, dtype: object

In [4]:
df['china_sentences'][6]

["Ross' battalion of around 900 men was surrounded by 10 to 20,000 Chinese soldiers.",
 'After fighting the Chinese off for three days and nights, they ran out of food, water, and provisions.',
 'On the fourth day, the Chinese Army started shelling chemical rounds of white phosphorous.',
 'Leaving the cave, they were soon found and surrendered to the Chinese Army.']

In [5]:
df['china_sentences_one'] = df['china_sentences'].apply(lambda x: " ".join(x))

In [6]:
corpus = df['china_sentences_one'].fillna("").tolist()
# corpus must be a list of joined strings, where each element is one full document

In [7]:
# Build a TF-IDF model
vectorizer = TfidfVectorizer(
    stop_words = "english", 
    ngram_range = (1,2),
    max_df = 0.8, 
    min_df =5
)

In [8]:
# convert to an entire matrix 
X = vectorizer.fit_transform(corpus)

In [9]:
# Get all of the names
terms = vectorizer.get_feature_names_out()

In [10]:
#Get all of the TF-IDF scores for each term across all of the documents
# Adding up TF-IDF scores for all documents
scores = X.sum(axis=0).A1   # sum TF-IDF score of each term across all documents
top_idx = scores.argsort()[::-1][10:50]
for i in top_idx:
    print(terms[i], scores[i])

republic china 29.292460696881925
communist 23.35958407719912
russia 20.54685510512622
party 20.24061623553268
foreign 19.889242855926746
chinese communist 19.672228435751126
communist party 19.57615784195623
security 19.314209993555554
act 18.145066708900725
like 17.97278802660223
american 17.227323690256345
government 17.084613265466217
purposes 16.92317426112389
world 16.632532884933457
president 15.341904165860774
energy 14.628382132151565
military 14.107829499076926
secretary 13.937258264817752
trump 13.812161835273338
including 13.73218218653121
iran 13.717650726158583
america 13.595401614911124
countries 13.362104419673456
defense 12.890981238163185
state 12.297438906924796
department 11.500391865453368
tariffs 11.360313861129278
committee foreign 11.287412077290853
purposes committee 11.230010227217923
china russia 11.21218230028843
000 11.189579569369947
country 11.114612191592611
percent 11.005516539061588
prc 10.998390695614978
national 10.798416136441375
influence 10.751736

In [None]:
# adversary therefore 

In [11]:
keyword = "including"
results = []
for sentences in df['china_sentences']:
    matches = [s for s in sentences if keyword.lower() in s.lower()]
    results.extend(matches)   # add to final list
#results

In [63]:
output_path = "including.txt"

with open(output_path, "w", encoding="utf-8") as f:
    for sent in results:
        f.write(sent.strip() + "\n")

print(f"Saved {len(sentences)} sentences to {output_path}")

Saved 1 sentences to including.txt


In [69]:
# Find the most important terms for each document 

i = 809  #place the right index here
doc_vec = X[i].toarray().ravel()
top_idx = doc_vec.argsort()[::-1][:30]

for j in top_idx:
    print(terms[j], doc_vec[j])

000 0.5568029612847262
000 000 0.33974095935893905
counterfeit 0.2691331959048202
loss 0.2584173266748768
goods 0.21012984197643536
pills 0.1639870100360789
safety 0.1467613010701999
2024 0.12035986145464074
products 0.11324698645297968
enforcement administration 0.11317934165174554
drug enforcement 0.111155479431862
united states 0.10677512679170084
states 0.10599107695099967
united 0.10543750137480401
public 0.09764173426533354
year 0.09418985591199139
standards 0.09355451968815591
according 0.09207618083810548
drug 0.08764806922019167
tax 0.08358904873494863
fentanyl 0.079486519907711
enforcement 0.07673770656649323
economy 0.07520017221573363
administration 0.065446956734648
seizures 0.06048394520682841
adhere 0.06048394520682841
economy united 0.06048394520682841
301 0.06048394520682841
seized 0.0590034108001587
customs 0.057720913561919676


In [72]:
### Find what each speaker really cares 
speaker = "Cotton"
mask = df['name'] == speaker

X_s = X[mask.values]
scores_s = X_s.mean(axis=0).A1
top_idx = scores_s.argsort()[::-1][:20]

for j in top_idx:
    print(terms[j], scores_s[j])

finance 0.0939428714961939
warned 0.09332360231481962
federal funds 0.08958809840555525
republican 0.07815491337176829
ban 0.07655293255981492
manufactured people 0.07655293255981492
committee finance 0.07477235979025026
federal 0.07414728699767105
drugs 0.07110447897174825
colleagues 0.0702358722353367
party 0.06821214225426532
manufactured 0.06669221025868928
transactions 0.06272833471848924
drawing rights 0.06272833471848924
special drawing 0.06272833471848924
purchase 0.06157080590548686
committee 0.06013085373890652
party committee 0.05908091977604883
drawing 0.05908091977604883
china purposes 0.058596262818604826


In [74]:
# Find the difference between Democrats and Republicans 
D_mask = df['party'] == 'D'
R_mask = df['party'] == 'R'

scores_D = X[D_mask.values].mean(axis=0).A1
scores_R = X[R_mask.values].mean(axis=0).A1

diff_D = scores_D - scores_R
diff_R = scores_R - scores_D

In [142]:
# Find the distinct terms for the Democratic party
for j in diff_D.argsort()[::-1][:25]:
    print(terms[j], diff_D[j])

tariffs 0.013936227045154988
russia china 0.010443092182030756
russia 0.009413318845847535
chinese government 0.008670130933982516
nuclear 0.008065122349866656
ukraine 0.007547655087221138
percent 0.007015003856078329
step 0.006823882978033894
ties 0.006018509111240911
ties chinese 0.0058957517542544136
going 0.005462520022453395
usaid 0.005435405382174096
cede 0.00533483627458135
north korea 0.005131738967117519
north 0.005123142702657568
musk 0.005094916711252373
donald trump 0.0050796613639123465
donald 0.005073903313446815
abroad 0.005058591079955716
tariffs china 0.004909418887240399
did 0.00483381422284275
said 0.004793246524272481
chinese american 0.004781350675551483
clean 0.0047247936158545155
leadership 0.00468331389006906


In [None]:
# From the words reported by the Democratic Party, we can easily see that
# Democrats care about 
# tariffs with China (probably critize Trump for using tarrifs against China)
# the relationship between Russian and China 
# Ukrainian War
# North Korean and China 
# Chinese ties globally 
# Everything abroad (usaid)

# The Democratic Party cares more about what is happening on the global scale (the wo
# So the emphasis is very clear: diplomacy, global geopolitical structures, Russia-China Alignment, trade policy criticism, foreign aid, 
# and also clean energy 
# Thus, the Democrats frame China as a geopolitical + diplomatic threat 

In [153]:
# These are to be used to find the exact sentences that contain the keywords
keyword = "leadership"
results = []
for sentences in df_D['china_sentences']:
    matches = [s for s in sentences if keyword.lower() in s.lower()]
    results.extend(matches)   # add to final list
results

["The statement of the resolution, the core point of it, is that AID is central to advancing the national security of the United States because it mitigates threats abroad before they reach us here; it promotes global stability; and it addresses the root causes of migration and extremism and secures the leadership and influence of the United States in an era of strategic competition with the People's Republic of China.",
 'And as Russia and China have shown time and time and again, they are ready and waiting to fill the vacuum left by strong American leadership.',
 'In recent days, Musk has been busy illegally shuttering USAID, cutting off foreign assistant programs, which I said will lose jobs for Americans, lose lives in countries around the world, and lose leadership as adversaries like China fill that gap.',
 "Even if you don't care about what USAID does, even if you are content to let China take over development around the world and win over friends and mineral rights and turn our

In [178]:
# Find the distinct terms for the Republican party
for j in diff_R.argsort()[::-1][:30]:
    print(terms[j], diff_R[j])

committee 0.016270650078133716
communist 0.01283436971552979
chinese communist 0.01150371777756422
foreign 0.011136641356718813
communist party 0.010822758779054812
party 0.010185478900736983
people 0.008879227281584953
fentanyl 0.0088107993682605
purposes committee 0.008340137997207031
republic 0.007903086212719753
purposes 0.007515677591770754
people republic 0.0074750075156028144
oil 0.0072563099403829545
republic china 0.007256272076897502
defense 0.006118631523406595
debt 0.006009930667426763
critical 0.005970851846511953
committee foreign 0.0059264121709294276
colleagues 0.005539387719119636
fired 0.005515335211893832
chinese 0.005379324656035446
borrowing 0.005334934096671476
threat 0.005334610611407564
china purposes 0.005238306612476239
coal 0.0050523664822131695
manufacturing 0.004842336410835303
policy 0.004802954629232309
united states 0.004800391141144205
equipment 0.004700352590621614
certainly 0.00466573161855709


In [None]:
# emphasizes heavily on the China's Communistic nature -> strong ideological and value difference
# defense -> national security 
# debt -> economic coertion
# fentanyl -> narctoics supply
# oil, coal -> energy policy 

# Thus, the Repubican Party strongly cares about the what is happening that is directly affecting the United States as a country
# they care more about fired 
# The Republican strongly cares about the ideological nature of the Communist Party of China 
# The Republica party sees China as a threat overall: 
#an ideological threat(CCP), a security threat (defesne), a public health threat (fentanyl), economic threat: borrowing, debt, 
# also an energy threat (traditional): oil, coal, manufacturing threat: manufacturing, equipment
# basically everything that can threat the United States herself rather than what is going on in the world. 

In [180]:
# These are to be used to find the exact sentences that contain the keywords
keyword = "equipment"
results = []
for sentences in df_R['china_sentences']:
    matches = [s for s in sentences if keyword.lower() in s.lower()]
    results.extend(matches)   # add to final list
results

['During the COVID-19 pandemic, Taiwan generously offered to send face masks and personal protection equipment to the United States.',
 "So if China is determined to--let me try to use a more neutral word-- to undermine the United States of America, I could see where we would limit, want to limit, some of our goods such as, let's say, semiconductor design equipment.",
 'The following non-MDE items will also be included: communications equipment, including AN/PRC-160 and AN/PRC-167 radios and associated accessories and training; Defense Advanced Global Positioning System (GPS) Receivers (DAGRs); AN/PYQ-10 Simple Key Loaders; and other related elements of program and logistics support.',
 "(E) Because of the prevalence of United States manufacturing equipment in global semiconductor supply chains, nearly all chips produced worldwide, including in the People's Republic of China, are subject to United States export controls if destined for Russia or Belarus.",
 "(a) In General.--Not later 

In [None]:
# Some Very Interesting: 
# The Democrats care so much about America lagging behind China in terms of clean and renewable energies and threatened by how fast
# China's clean energy expands (solar, wind, electrical vehicles) 

# The Republicans linked the Chinese Communist Party with Tiktok and church activities including Zion churches
# oil from Iran, oil exports to China 
# US is concerned for China's economic expansion
# Us must defend itself from China's Tiktok, fentanyl, semiconductor supply chain