In [1]:
# Import dependencies
import pandas as pd
import numpy as np
import re
from path import Path
from langdetect import detect, detect_langs
from nltk.stem.snowball import SnowballStemmer

In [2]:
# Create DataFrame from CSV file
music_df = pd.read_csv('../../preliminary_dataframes/nlp_df.csv')
music_df = music_df.drop(columns=['Unnamed: 0', 'hashedValues', 'features'])
music_df.head()

Unnamed: 0,song,artist,category,lyrics,words,filtered
0,Monster (Shawn Mendes & Justin Bieber),Shawn Mendes,34,You put me on a pedestal and tell me Im the b...,"['', 'you', 'put', 'me', 'on', 'a', 'pedestal'...","['', 'put', 'pedestal', 'tell', 'im', 'best', ..."
1,positions,Ariana Grande,34,Heaven sent you to me Im just hopin I dont re...,"['', 'heaven', 'sent', 'you', 'to', 'me', 'im'...","['', 'heaven', 'sent', 'im', 'hopin', 'dont', ..."
2,Therefore I Am,Billie Eilish,34,Im not your friend Or anything damn You think...,"['', 'im', 'not', 'your', 'friend', 'or', 'any...","['', 'im', 'friend', 'anything', 'damn', 'thin..."
3,Levitating (feat. DaBaby),Dua Lipa,34,Billboard Baby Dua Lipa los hace bailar cuand...,"['', 'billboard', 'baby', 'dua', 'lipa', 'los'...","['', 'billboard', 'baby', 'dua', 'lipa', 'los'..."
4,Dakiti,Bad Bunny,34,Baby ya yo me enteré se nota cuando me ve Ahí...,"['', 'baby', 'ya', 'yo', 'me', 'enteré', 'se',...","['', 'baby', 'ya', 'yo', 'enteré', 'se', 'nota..."


In [4]:
# Create a list of languages and filtered words
languages = []
filtered_words_list = []
unique_word_count = []
for index, row in music_df.iterrows():
    filtered_words = music_df['filtered'][index]
    filtered_words = filtered_words.replace(',', '').replace('.', '').replace('`', '')
    filtered_words = filtered_words.replace('[', '').replace(']', '').replace('{', '').replace('}', '')
    filtered_words = filtered_words.replace('!', '').replace("'", '').replace('"', '').replace('”', '')
    filtered_words = filtered_words.replace(':', '').replace(';', '').replace('*', '')
    filtered_words = filtered_words.replace('“', '').replace('_', '').replace('—', '').replace('–', ' ')
    filtered_words = filtered_words.replace('\\u2005', ' ').replace('¨', '')
    filtered_words = filtered_words.replace('‘', '')
    filtered_words = filtered_words.replace('=', ' ').replace('·', '')
    while ('\\u200e' in filtered_words) or ('\\xa0' in filtered_words):
        filtered_words = filtered_words.replace('\\u200e', '')
        filtered_words = filtered_words.replace('\\xa0', '')
    filtered_words_list.append(filtered_words)
    try:
        lang = detect_langs(filtered_words)
        if len(lang) == 1:
            languages.append(str(lang[0])[0:2])
        else: 
            languages.append(np.nan)
    except:
        languages.append(np.nan)
    unique_words = list(set(filtered_words.strip().split(' ')))
    unique_word_count.append(len(unique_words))

In [5]:
# Add a language column to the DataFrame and replace the filtered column
music_df['language'] = languages
music_df['filtered'] = filtered_words_list
music_df['unique_words'] = unique_word_count
music_df

Unnamed: 0,song,artist,category,lyrics,words,filtered,language,unique_words
0,Monster (Shawn Mendes & Justin Bieber),Shawn Mendes,34,You put me on a pedestal and tell me Im the b...,"['', 'you', 'put', 'me', 'on', 'a', 'pedestal'...",put pedestal tell im best raise sky im short ...,en,82
1,positions,Ariana Grande,34,Heaven sent you to me Im just hopin I dont re...,"['', 'heaven', 'sent', 'you', 'to', 'me', 'im'...",heaven sent im hopin dont repeat history boy ...,en,57
2,Therefore I Am,Billie Eilish,34,Im not your friend Or anything damn You think...,"['', 'im', 'not', 'your', 'friend', 'or', 'any...",im friend anything damn think youre man think...,en,67
3,Levitating (feat. DaBaby),Dua Lipa,34,Billboard Baby Dua Lipa los hace bailar cuand...,"['', 'billboard', 'baby', 'dua', 'lipa', 'los'...",billboard baby dua lipa los hace bailar cuand...,es,194
4,Dakiti,Bad Bunny,34,Baby ya yo me enteré se nota cuando me ve Ahí...,"['', 'baby', 'ya', 'yo', 'me', 'enteré', 'se',...",baby ya yo enteré se nota cuando ve ahí donde...,es,137
...,...,...,...,...,...,...,...,...
725,Better,BoA,21,그만 거기서 한 걸음만 뒤를 돌아보지 말고 걸어와 넌 나를 믿고 그냥 걸어봐 위험...,"['', '그만', '거기서', '한', '걸음만', '뒤를', '돌아보지', '말...",그만 거기서 한 걸음만 뒤를 돌아보지 말고 걸어와 넌 나를 믿고 그냥 걸어봐 위험...,ko,159
726,LAST PIECE,GOT7,21,이젠 알아 네가 없어진 나란 절대 있을 수 없단 걸 텅 빈 인형일 뿐인 걸 이젠 ...,"['', '이젠', '알아', '네가', '없어진', '나란', '절대', '있을'...",이젠 알아 네가 없어진 나란 절대 있을 수 없단 걸 텅 빈 인형일 뿐인 걸 이젠 ...,,123
727,Black Mamba,aespa,21,Hey Hey Im addicted 끊임없이 말을 걸어주는 나의 aespa Oh ...,"['', 'hey', 'hey', 'im', 'addicted', '끊임없이', '...",hey hey im addicted 끊임없이 말을 걸어주는 나의 aespa oh ...,,100
728,90's Love,NCT U,21,""" Hey hey hey hey hey hey ho ho Hey hey hey he...","['""', 'hey', 'hey', 'hey', 'hey', 'hey', 'hey'...",hey hey hey hey hey hey ho ho hey hey hey hey...,en,160


In [6]:
# Remove songs that do not have english lyrics
for index, row in music_df.iterrows():
    if row['language'] != 'en':
        music_df = music_df.drop(index)
music_df

Unnamed: 0,song,artist,category,lyrics,words,filtered,language,unique_words
0,Monster (Shawn Mendes & Justin Bieber),Shawn Mendes,34,You put me on a pedestal and tell me Im the b...,"['', 'you', 'put', 'me', 'on', 'a', 'pedestal'...",put pedestal tell im best raise sky im short ...,en,82
1,positions,Ariana Grande,34,Heaven sent you to me Im just hopin I dont re...,"['', 'heaven', 'sent', 'you', 'to', 'me', 'im'...",heaven sent im hopin dont repeat history boy ...,en,57
2,Therefore I Am,Billie Eilish,34,Im not your friend Or anything damn You think...,"['', 'im', 'not', 'your', 'friend', 'or', 'any...",im friend anything damn think youre man think...,en,67
5,Errbody,Lil Baby,34,Flyer than everybody Section 8 just straight ...,"['', 'flyer', 'than', 'everybody', 'section', ...",flyer everybody section 8 straight cooked muh...,en,288
6,Whoopty,CJ,34,Loyalty over royalty yall niggas know the vib...,"['', 'loyalty', 'over', 'royalty', 'yall', 'ni...",loyalty royalty yall niggas know vibes pxcoyo...,en,101
...,...,...,...,...,...,...,...,...
706,CALL ME BABY,EXO,21,이 거리는 완전 난리야 사람들 사이는 남이야 함께하는 매 순간이 Like boom ...,"['이', '거리는', '완전', '난리야', '사람들', '사이는', '남이야',...",이 거리는 완전 난리야 사람들 사이는 남이야 함께하는 매 순간이 like boom ...,en,242
714,Breath,GOT7,21,I cant breath Cant you see Let me breath Ba...,"['', 'i', 'cant', 'breath', '', 'cant', 'you',...",cant breath cant see let breath baby set fr...,en,18
722,Dream of You (with R3HAB),CHUNG HA,21,YO VISTO ASÍ Bad Bunny Angels Like You Mil...,"['yo', 'visto', 'así', '', '', 'bad', 'bunny',...",yo visto así bad bunny angels like miley c...,en,328
723,90's Love,NCT U,21,""" Hey hey hey hey hey hey ho ho Hey hey hey he...","['""', 'hey', 'hey', 'hey', 'hey', 'hey', 'hey'...",hey hey hey hey hey hey ho ho hey hey hey hey...,en,160


In [7]:
# Remove songs with less than 50 unique words
music_df = music_df[music_df['unique_words'] > 50]
music_df

Unnamed: 0,song,artist,category,lyrics,words,filtered,language,unique_words
0,Monster (Shawn Mendes & Justin Bieber),Shawn Mendes,34,You put me on a pedestal and tell me Im the b...,"['', 'you', 'put', 'me', 'on', 'a', 'pedestal'...",put pedestal tell im best raise sky im short ...,en,82
1,positions,Ariana Grande,34,Heaven sent you to me Im just hopin I dont re...,"['', 'heaven', 'sent', 'you', 'to', 'me', 'im'...",heaven sent im hopin dont repeat history boy ...,en,57
2,Therefore I Am,Billie Eilish,34,Im not your friend Or anything damn You think...,"['', 'im', 'not', 'your', 'friend', 'or', 'any...",im friend anything damn think youre man think...,en,67
5,Errbody,Lil Baby,34,Flyer than everybody Section 8 just straight ...,"['', 'flyer', 'than', 'everybody', 'section', ...",flyer everybody section 8 straight cooked muh...,en,288
6,Whoopty,CJ,34,Loyalty over royalty yall niggas know the vib...,"['', 'loyalty', 'over', 'royalty', 'yall', 'ni...",loyalty royalty yall niggas know vibes pxcoyo...,en,101
...,...,...,...,...,...,...,...,...
700,Dedicated To J Dilla,Linn Mori,35,"""BEHOLD! My """"Walls Of Worship"""": MARK I MARK ...","['""behold!', 'my', '""""walls', 'of', 'worship""""...",behold walls worship mark mark ii mark iii a$a...,en,1078
706,CALL ME BABY,EXO,21,이 거리는 완전 난리야 사람들 사이는 남이야 함께하는 매 순간이 Like boom ...,"['이', '거리는', '완전', '난리야', '사람들', '사이는', '남이야',...",이 거리는 완전 난리야 사람들 사이는 남이야 함께하는 매 순간이 like boom ...,en,242
722,Dream of You (with R3HAB),CHUNG HA,21,YO VISTO ASÍ Bad Bunny Angels Like You Mil...,"['yo', 'visto', 'así', '', '', 'bad', 'bunny',...",yo visto así bad bunny angels like miley c...,en,328
723,90's Love,NCT U,21,""" Hey hey hey hey hey hey ho ho Hey hey hey he...","['""', 'hey', 'hey', 'hey', 'hey', 'hey', 'hey'...",hey hey hey hey hey hey ho ho hey hey hey hey...,en,160


In [11]:
# Add a column for English word count
english_word_counts = []
for index, row in music_df.iterrows():
    english_count = 0 
    filtered_words = row['filtered']
    for word in list(set(filtered_words.strip().split(' '))):
        try: 
            lang = detect(word)
            if lang == 'en':
                english_count+=1
        except: 
            print(word)
    english_word_counts.append(english_count)
music_df['english_words'] = english_word_counts
music_df

12
8
42
2020
12
2020


1
7/2
7/5
4
7/20
7/23
070
7/14
182
1999
7/24
4000
7/1
30000
｡
7/22
7/7
｡ﾟ+
7/29
7/28
2%
7/11
7/30
2
2020
7/27
7/25
7/10
5
99
7/19
7/3
7/17
7/21
7/13
/
7/15
12
23
500
7/8
7/31
8
#9
7/9
7/4
20
03
7/12
90
7
40
&
7/6
7/16
69
45
999


9
125000
89
193
80
$28310765
38
67
222
223
141
178
204
216
210
119
126
1907
65
15
59
76
85
5
124
127
50
171
45
219
226
66
17
41
23
79
109
800000
91
160
104
1980
184
36
34
181
95
120
116
47
96
75
$10000
97
27
90
137
122
78
#3
147
218
108
29
18
30
60
$1500
46
177
180
11
54
203
197
51
24
198
161
107
176
6
153
186
149
150
#1
43
115
213
189
165
130
99
201
1100
220
194
103
132
113
158
185
145
$5
12
111
2846
157
102
105
162
192
39
135
146
77
129
94
195
118
214
58
175
142
$100000
72
33
117
57
206
134
191
183
32
221
202
28
200
$100
148
21
19
215
71
13
173
159
168
170
125
128
1981
92
70
48
188
35
73
143
196
44
42
25
250
140
174
31
163
8
$500
224
3
139
25000
112
$800
56
16
98
20
131
7
93
63
68
7000
207
49
1
164
110
136
166
4
187
#2
88
86
169
26
182

2
64
20%

1
…
2008
22
&

1830
104
####
1

4500
9
1882
385
119
241
5
1590
317
219
226
62282
1883
269
97
227
90
522
29
18
1872
451
60
46
519
51
305
5000
176
6
189
1807
158
347
340
77
249
94
484
62
57
206
183
221
1880
275
1803
348
70
285
290
143
42
1572
25
1846
8
224
3
7½
25000
365
98
20
346
7
93
1852
1
264
4
84
123
279
14
151
138
2
281
133
308
208
230
364
22
527
/
52
242
267
10
331
396

30
333

3714
3568
3336
3868
9000
3055
3041
3128
11424
1
3872
3672
3225
3755
3618
3912
3588
3017
3489
3159
3160
3929
3201
3439
3372
3716
3845
3093
3203
3975
3015
3355
3277
3848
3747
3523
3723
3326
3259
3176
3448
3554
10000000
3290
3761
3127
3762
3349
3457
3364
3732
3864
3254
3158
3970
3924
™
3305
3930
3794
3936
3101
3452
3288
3236
3394
3673
3855
3147
3182
3331
3354
3626
3478
3928
3751
3306
997
3680
3995
3074
3842
3947
3390
3722
3150
3021
3905
3138
3362
3833
3295
3142
3219
3854
3375
3959
3031
3171
3088
3062
3736
3092
3054
3301
3822
3069
3746
3955
3366
3323
3734
3706
3510
3373
3458
3360
3910
3374
3222
¢10
32

2568
23
1842
2048
923
1862
2387
1150
1710
1521
1984
2753
2207
1957
337
2118
074
2303
1835
341
608
2630
1239
1950
2483
051
852
1415
1027
615
2071
2803
109
1221
1181
358
2497
889
596
993
1368
1606
1517
1995
801
2835
2394
1009
494
1186
1883
91
#5
160
1830
2797
328
1706
815
1179
1324
2045
2731
1062
1865
2536
1793
319
1718
906
323
2449
1328
339
1603
1987
682
1293
104
1090
1512
1754
2964
060
485
1980
831
2110
2290
2517
184
1707
941
049
2788
2574
1249
909
2075
251
2861
2839
1340
2161
1022
1528
1735
1417
1151
1290
#31
013
2338
2006
706
1055
2476
2165
3999
#1812
2875
2128
666
2241
1235
607
235
2670
2756
407
2596
961
755
600
369
1614
2833
1230
1833
819
1737
2444
████
455
1381
084
1038
438
434
1674
1506
181
1110
1607
2616
1405
904
120
2928
610
2383
928
1176
116
2488
1667
058
2730
296
1901
1640
269
2024
1991
2800
1820
2647
2076
2940
001
2256
1492
2461
2272
1648
2478
2966
2995
273
2385
1171
1353
2789
1534
1344
2828
591
2542
534
463
656
2499
1850
897
1897
951
227
415
2163
2943
990
1071
356
2891
045


2811
758
031
2350
2633
2326
861
1990
1480
2352
1203
1030
032
557
1455
2354
2945
2671
044
2643
822
2260
2772
1088
1359
1269
885
1345
1685
&
2987
2134
2529
2778
255
1391
2598
1499
2390
2976
777
461
2950
207
1016
325
2603
1852
1920
2892
383
2039
2247
1058
2706
458
1159
1388
1019
2850
1677
835
511
229
1
2970
1925
164
665
2680
2221
1040
1089
1959
1708
2340
110
2154
349
433
1500
1484
2389
435
575
744
624
092
2677
1213
418
469
1948
2538
2435
355
748
1968
2746
1976
1236
1460
2487
2967
2149
1336
2760
2153
2199
336
1858
1278
910
2508
2171
2500
2201
2328
1663
1153
2056
948
073
459
2698
2074
1429
2469
1450
1193
668
1855
1942
1189
1526
2102
370
1592
2878
1355
1032
896
1868
2393
1464
1877
930
1580
252
736
1779
136
712
1109
2378
2562
546
166
2953
1082
2708
1397
937
2252
472
264
2936
1692
2683
4
499
2391
1477
1510
2447
568
454
2525
1628
2106
1896
628
1096
1916
2188
2311
862
2329
1943
1106
1759
1812
2533
564
2029
1250
716
2852
1233
664
1985
2786
329
1113
1893
1273
2663
1211
187
2233
737
$҉
410
536
155


363
394
100
167
578
81
52
530
242
04
750
267
1994
03
310
10
37

1
90059
1998
1996
2209
444
72
9
1982
4/4
360
136
4
60
1985
1983
11
0
86
1999
21
316
19
2019
508
456
1993
101
13
2013
2011
87
74
2003
2005
82
+
31520
1986
1498
6
151
1981
2002
15
2
59
2020
507
64
48
2015
133
85
5
1017
2016
50
73
2000
1992
/
12
23
500
1984
25
2014
100
1997
1995
105
1987
1980
1988
8
2006
2/30
2010
36
2009
2008
3
2017
2012
2007
1991
121
@
16
3030
1994
2001
20
27
1989
10
1990
7
40
93
06
2004
&
7000
2018
1/15

680
314
9
360
38
67
222
33000
223
282
65
256
23
79
36
34
27
1879
243
11
24
380
153
1385
115
213
400
220
194
185
12
683
162
192
33
32
200
80000
168
70
48
237
692
140
1885
163
8
332
3
1831
20
7
68
1
748
4
26
50000
1326
144
69
321
2
64
449
133
106
55
22
10000
300
10
40

37
38
7
3
18
15
40
22
24
12
16
5
50
33
1
2
51
21
35
59
11
13
43
9
54
23
58
57
19
&
2017
39
55
14
8
25
31
60
6
20
32
27
36
070
45
46
44
26
30
48
53
56
4
17
29
47
52
41
49
10
42
28
34


12
5
1
0
9
8
808
10
1
2
100
75

915

21
4
2
&
0122
5


18
3

10
40

1
78
1998
1977
83655
110
1996
58
1971
1948
1924
1976
29
53%
72
18
33
9
1982
57
61
136
60%
13970
30
4
1978
34%
80
60
32
1985
46
100000
40000
28
38
1983
239
11
0
10%
29%
200
54
20081105
67
275
26
53
1999
750000
51
21
19
123
1993
71
24
13
173
$23
5188
$32
69
309250
74
2003
2005
82
1300
14
1986
6
58000
6000
65
2002
150
15
2
59
46%
43
70
281
19%
73300
64
76
48
320
5
188
30%
35
55
45350
3000
103
700
22
50
300%
73
2000
9/11
02
45
219
44
66
1956
17
200655
41
42
12
20000
23
1701
844
1984
25
$30
10000
1000
100
7700
1009
31
450
105
1212041462265
11680
1988
25%
8
2006
80%
36
52
2009
34
2008
3
112
181
1184
39
20090220
1967
2007
242
14%
1947
56
47
77
75
16
1994
$21
2001
20
27
270
22122008
227
10
37
7
40
62
2004
63
&
68
1974
461
2008/9
49

9
5
11
6
…
12
28
3/9
8
3
7
4
2
106
5688
10

9
5
11
6
…
12
28
3/9
8
3
7
4
2
106
5688
10
3
1
2

9
5
11
6
…
12
28
3/9
8
3
7
4
2
106
5688
10

9
5
11
6
…
12
28
3/9
8
3
7
4
2
106
5688
10

°
#
32
@
&
ville@in
1
4
free@odc
10
4444

1
1998
1972
1996
435
08
444
18
194

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,song,artist,category,lyrics,words,filtered,language,unique_words,word_stems,english_words
0,Monster (Shawn Mendes & Justin Bieber),Shawn Mendes,34,You put me on a pedestal and tell me Im the b...,"['', 'you', 'put', 'me', 'on', 'a', 'pedestal'...",put pedestal tell im best raise sky im short ...,en,82,"[put, pedest, tell, im, best, rais, sky, im, s...",18
1,positions,Ariana Grande,34,Heaven sent you to me Im just hopin I dont re...,"['', 'heaven', 'sent', 'you', 'to', 'me', 'im'...",heaven sent im hopin dont repeat history boy ...,en,57,"[heaven, sent, im, hopin, dont, repeat, histor...",6
2,Therefore I Am,Billie Eilish,34,Im not your friend Or anything damn You think...,"['', 'im', 'not', 'your', 'friend', 'or', 'any...",im friend anything damn think youre man think...,en,67,"[im, friend, anyth, damn, think, your, man, th...",14
5,Errbody,Lil Baby,34,Flyer than everybody Section 8 just straight ...,"['', 'flyer', 'than', 'everybody', 'section', ...",flyer everybody section 8 straight cooked muh...,en,288,"[flyer, everybodi, section, 8, straight, cook,...",54
6,Whoopty,CJ,34,Loyalty over royalty yall niggas know the vib...,"['', 'loyalty', 'over', 'royalty', 'yall', 'ni...",loyalty royalty yall niggas know vibes pxcoyo...,en,101,"[loyalti, royalti, yall, nigga, know, vibe, px...",18
...,...,...,...,...,...,...,...,...,...,...
700,Dedicated To J Dilla,Linn Mori,35,"""BEHOLD! My """"Walls Of Worship"""": MARK I MARK ...","['""behold!', 'my', '""""walls', 'of', 'worship""""...",behold walls worship mark mark ii mark iii a$a...,en,1078,"[behold, wall, worship, mark, mark, ii, mark, ...",158
706,CALL ME BABY,EXO,21,이 거리는 완전 난리야 사람들 사이는 남이야 함께하는 매 순간이 Like boom ...,"['이', '거리는', '완전', '난리야', '사람들', '사이는', '남이야',...",이 거리는 완전 난리야 사람들 사이는 남이야 함께하는 매 순간이 like boom ...,en,242,"[이, 거리는, 완전, 난리야, 사람들, 사이는, 남이야, 함께하는, 매, 순간이,...",19
722,Dream of You (with R3HAB),CHUNG HA,21,YO VISTO ASÍ Bad Bunny Angels Like You Mil...,"['yo', 'visto', 'así', '', '', 'bad', 'bunny',...",yo visto así bad bunny angels like miley c...,en,328,"[yo, visto, así, bad, bunni, angel, like, mile...",39
723,90's Love,NCT U,21,""" Hey hey hey hey hey hey ho ho Hey hey hey he...","['""', 'hey', 'hey', 'hey', 'hey', 'hey', 'hey'...",hey hey hey hey hey hey ho ho hey hey hey hey...,en,160,"[hey, hey, hey, hey, hey, hey, ho, ho, hey, he...",4


In [14]:
# Remove songs with less than 50 unique English words
music_df = music_df[music_df['english_words']>50]
music_df

Unnamed: 0,song,artist,category,lyrics,words,filtered,language,unique_words,word_stems,english_words
5,Errbody,Lil Baby,34,Flyer than everybody Section 8 just straight ...,"['', 'flyer', 'than', 'everybody', 'section', ...",flyer everybody section 8 straight cooked muh...,en,288,"[flyer, everybodi, section, 8, straight, cook,...",54
33,Come & Go (with Marshmello),Juice WRLD,0,"""7/1 A Boogie Wit Da Hoodie & Don Q """"Flood ...","['""7/1', 'a', 'boogie', 'wit', 'da', 'hoodie',...",7/1 boogie wit da hoodie & q flood wrist ft ...,en,1668,"[7/1, boogi, wit, da, hoodi, &, q, flood, wris...",184
56,Another Day in Paradise - 2016 Remaster,Phil Collins,3,"""Scarface By: Oliver Stone """"Enjoy yourself ...","['""scarface', 'by:', 'oliver', 'stone', '""""enj...",scarface by oliver stone enjoy every day gr...,en,4992,"[scarfac, by, oliv, stone, enjoy, everi, day, ...",938
61,Don't Leave Me Now,Lost Frequencies,3,"""Scene 08: Timber & The Owls Scene 09: The TV ...","['""scene', '08:', 'timber', '&', 'the', 'owls'...",scene 08 timber & owls scene 09 tv studio tick...,en,1133,"[scene, 08, timber, &, owl, scene, 09, tv, stu...",238
65,Crush,braj mahal,3,"""Last updated: 10/6/2018 5:16PM MST OctoberOct...","['""last', 'updated:', '10/6/2018', '5:16pm', '...",last updated 10/6/2018 516pm mst octoberoctobe...,en,4193,"[last, updat, 10/6/2018, 516pm, mst, octoberoc...",516
...,...,...,...,...,...,...,...,...,...,...
685,Phone Yam,Alfa Kat,1,""" I spend time in the hood Martin luther king ...","['""', 'i', 'spend', 'time', 'in', 'the', 'hood...",spend time hood martin luther king mansion cr...,en,6084,"[spend, time, hood, martin, luther, king, mans...",870
687,What Is Life - Remastered 2014,George Harrison,35,"""Last updated: 10/6/2018 5:16PM MST OctoberOct...","['""last', 'updated:', '10/6/2018', '5:16pm', '...",last updated 10/6/2018 516pm mst octoberoctobe...,en,4193,"[last, updat, 10/6/2018, 516pm, mst, octoberoc...",508
689,Band On The Run - Remastered 2010,Wings,35,""" – Ayo introduce yourself – Slava KPSS – Yeee...","['""', '–', 'ayo', 'introduce', 'yourself', '–'...",ayo introduce slava kpss yeeea slavik w...,en,2089,"[ayo, introduc, slava, kpss, yeeea, slavik, wh...",367
693,Trapdoor,Gorgon City,35,"""FADE IN: EXT. CITYSCAPE NIGHT Gotham City. ...","['""fade', 'in:', 'ext.', 'cityscape', '', '', ...",fade in ext cityscape night gotham city city...,en,4286,"[fade, in, ext, cityscap, night, gotham, citi,...",854


In [16]:
# Add column of word stems
word_stems = []
stemmer = SnowballStemmer("english")
for index, row in music_df.iterrows():
    stems = []
    filtered_words = row['filtered']
    for word in filtered_words.split():
        stems.append(stemmer.stem(word))
    word_stems.append(stems)
music_df['word_stems'] = word_stems
music_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


Unnamed: 0,song,artist,category,lyrics,words,filtered,language,unique_words,word_stems,english_words
5,Errbody,Lil Baby,34,Flyer than everybody Section 8 just straight ...,"['', 'flyer', 'than', 'everybody', 'section', ...",flyer everybody section 8 straight cooked muh...,en,288,"[flyer, everybodi, section, 8, straight, cook,...",54
33,Come & Go (with Marshmello),Juice WRLD,0,"""7/1 A Boogie Wit Da Hoodie & Don Q """"Flood ...","['""7/1', 'a', 'boogie', 'wit', 'da', 'hoodie',...",7/1 boogie wit da hoodie & q flood wrist ft ...,en,1668,"[7/1, boogi, wit, da, hoodi, &, q, flood, wris...",184
56,Another Day in Paradise - 2016 Remaster,Phil Collins,3,"""Scarface By: Oliver Stone """"Enjoy yourself ...","['""scarface', 'by:', 'oliver', 'stone', '""""enj...",scarface by oliver stone enjoy every day gr...,en,4992,"[scarfac, by, oliv, stone, enjoy, everi, day, ...",938
61,Don't Leave Me Now,Lost Frequencies,3,"""Scene 08: Timber & The Owls Scene 09: The TV ...","['""scene', '08:', 'timber', '&', 'the', 'owls'...",scene 08 timber & owls scene 09 tv studio tick...,en,1133,"[scene, 08, timber, &, owl, scene, 09, tv, stu...",238
65,Crush,braj mahal,3,"""Last updated: 10/6/2018 5:16PM MST OctoberOct...","['""last', 'updated:', '10/6/2018', '5:16pm', '...",last updated 10/6/2018 516pm mst octoberoctobe...,en,4193,"[last, updat, 10/6/2018, 516pm, mst, octoberoc...",516
...,...,...,...,...,...,...,...,...,...,...
685,Phone Yam,Alfa Kat,1,""" I spend time in the hood Martin luther king ...","['""', 'i', 'spend', 'time', 'in', 'the', 'hood...",spend time hood martin luther king mansion cr...,en,6084,"[spend, time, hood, martin, luther, king, mans...",870
687,What Is Life - Remastered 2014,George Harrison,35,"""Last updated: 10/6/2018 5:16PM MST OctoberOct...","['""last', 'updated:', '10/6/2018', '5:16pm', '...",last updated 10/6/2018 516pm mst octoberoctobe...,en,4193,"[last, updat, 10/6/2018, 516pm, mst, octoberoc...",508
689,Band On The Run - Remastered 2010,Wings,35,""" – Ayo introduce yourself – Slava KPSS – Yeee...","['""', '–', 'ayo', 'introduce', 'yourself', '–'...",ayo introduce slava kpss yeeea slavik w...,en,2089,"[ayo, introduc, slava, kpss, yeeea, slavik, wh...",367
693,Trapdoor,Gorgon City,35,"""FADE IN: EXT. CITYSCAPE NIGHT Gotham City. ...","['""fade', 'in:', 'ext.', 'cityscape', '', '', ...",fade in ext cityscape night gotham city city...,en,4286,"[fade, in, ext, cityscap, night, gotham, citi,...",854


In [18]:
# Create list of unique word stems
stems_list = []
for index, row in music_df.iterrows():
    stems = row['word_stems']
    unique_stems = list(set(stems))
    stems_list.extend(unique_stems)
stem_columns = list(set(stems_list))
len(stem_columns)

61693