In [1]:
import pandas as pd
import datetime


# Preparing dataframes for analysis

In [2]:
df = pd.read_csv("crossword_year_2022.csv")
df.drop('Unnamed: 0', inplace=True, axis=1)
df

Unnamed: 0,crossword_date,answers,clues,crossword_year
0,2022-01-01,WINNER,First person?,2022
1,2022-01-01,PLANBS,Backups,2022
2,2022-01-01,WHOOPI,First name in daytime talk,2022
3,2022-01-01,MAOSUIT,Red Guard's attire,2022
4,2022-01-01,IAMBIC,"Like ""To be or not to be""",2022
...,...,...,...,...
15765,2022-07-08,STOAT,Source of some fur,2022
15766,2022-07-08,KONA,Hawaii's ___ Coast,2022
15767,2022-07-08,FELL,Cut down,2022
15768,2022-07-08,ERRS,Isn't right,2022


In [6]:
df2 = pd.read_csv("slangtill2015.csv")
df2.drop('Unnamed: 0', inplace = True, axis =1)
df3 = df2.append(df, ignore_index=True)
df3

  df3 = df2.append(df, ignore_index=True)


Unnamed: 0,crossword_date,answers,clues,crossword_year
0,1976-01-05,AJAX,Greek hero,1976
1,1976-01-05,WHOOP,Loud cry,1976
2,1976-01-05,CRAB,Peevish person,1976
3,1976-01-05,BALI,Romantic isle,1976
4,1976-01-05,HEAVE,Lift,1976
...,...,...,...,...
1243277,2022-07-08,STOAT,Source of some fur,2022
1243278,2022-07-08,KONA,Hawaii's ___ Coast,2022
1243279,2022-07-08,FELL,Cut down,2022
1243280,2022-07-08,ERRS,Isn't right,2022


### df_answers_clues is a dataframe with XW puzzle data from 1976 - 2022 (June)
To reduce the load/scrape time, I only included three columns: answers, clues and crossword date. But several other attributes are available for analysis

In [7]:
df_2018 = pd.read_csv("crossword_year_2018_2022 - crossword_year_2018_2022.csv")
df_2018.drop('Unnamed: 0', inplace=True, axis=1)
df_answers_clues = df_2018.append(df3, ignore_index=True)
df_answers_clues = df_answers_clues.sort_values('crossword_year').drop_duplicates()
df_answers_clues

  df_answers_clues = df_2018.append(df3, ignore_index=True)


Unnamed: 0,crossword_date,answers,clues,crossword_year
237137,1976-10-16,EASTEND,Part of a city,1976
233337,1976-09-02,OMANI,Muscat native,1976
233336,1976-09-02,ARS,___ poetica,1976
233335,1976-09-02,ICAME,"What ""veni"" means",1976
233334,1976-09-02,SAWIN,Greeted at the door,1976
...,...,...,...,...
1445147,2022-03-04,KAT,Kit ___ bar,2022
1445148,2022-03-05,MOSS,Much of Iceland's greenery,2022
1445149,2022-03-05,PROVEIT,"""I don't believe you""",2022
1445151,2022-03-05,FRIENDLYBANTER,Needle exchange?,2022


### Slang is a dataframe that I created with most of the words NYT editors considered slang. 

Modifiers in the clue that I used to collect these words are:
colloquially, modern lingo, slang, informally, slangily, in texts

In [9]:
slang = df_answers_clues[df_answers_clues['clues'].str.contains('in texts', regex=False) |df_answers_clues['clues'].str.contains('colloquially', regex=False) |df_answers_clues['clues'].str.contains('modern lingo', regex=False) | df_answers_clues['clues'].str.contains('slang', regex=False) | df_answers_clues['clues'].str.contains('informally', regex=False) | df_answers_clues['clues'].str.contains('slangily', regex=False)]
slang

Unnamed: 0,crossword_date,answers,clues,crossword_year
252416,1977-04-10,LEGGO,"68 Across, slangily",1977
284758,1978-04-21,SCHMO,"Sap, in slanguage",1978
286369,1978-05-09,OKAY,"Concurrence, colloquially",1978
281906,1978-03-19,LIBBER,"Feminist, colloquially",1978
320085,1979-09-02,ERK,"R.A.F. underling, informally",1979
...,...,...,...,...
1444957,2022-03-02,DJS,"Club workers, informally",2022
1445043,2022-03-03,SESH,"Meeting, informally",2022
1444857,2022-03-01,ALUMS,"Reunion attendees, informally",2022
1444908,2022-03-01,BRB,"""Hang on a sec,"" in texts",2022


<b> Tracking the lineage of a term's definition by looking at XWord Clues across time </b>

In [16]:
frequent_answers = df_answers_clues[df_answers_clues['answers'] == 'UNPACK']
frequent_answers

Unnamed: 0,crossword_date,answers,clues,crossword_year
1004607,2001-10-07,UNPACK,What to do after a vacation,2001
1042191,2003-01-04,UNPACK,Empty boxes,2003
162783,2020-05-15,UNPACK,"Work through, as feelings",2020
189727,2021-04-04,UNPACK,"Emotionally process, in modern lingo",2021
192124,2021-05-02,UNPACK,Take care of some personal baggage,2021
1451093,2022-05-15,UNPACK,"Attempt to grasp, as a complicated situation",2022


<b> How modern is Shortz's modern slang? </b>
<br/> Looking at the answers (slang terms) that he described as modern

In [None]:
slang = df_answers_clues[df_answers_clues['clues'].str.contains('modern lingo', regex=False) | df_answers_clues['clues'].str.contains('modern slang', regex=False)]
slang.to_csv("modern_slang.csv")

In [13]:
df_answers_clues[df_answers_clues['clues'].str.contains('', regex=False, na=False)]

Unnamed: 0,crossword_date,answers,clues,crossword_year
1118674,2005-07-10,DININGALACRATE,Supper at home before unpacking from a move?,2005
