# **`DataFrames practice: Working with English Words`**

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('words.csv')

In [None]:
df.head()

Unnamed: 0,Word,Char Count,Value
0,aa,2.0,2.0
1,aah,3.0,10.0
2,aahed,5.0,19.0
3,aahing,6.0,40.0
4,aahs,4.0,29.0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 122542 entries, 0 to 122541
Data columns (total 3 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   Word        122542 non-null  object 
 1   Char Count  122541 non-null  float64
 2   Value       122541 non-null  float64
dtypes: float64(2), object(1)
memory usage: 2.8+ MB


In [None]:
df.shape

(122542, 3)

In [None]:
# Assuming 'Word' is the column containing 'microspectrophotometries'
# and 'Value' is another column in your DataFrame
value = df.loc[df['Word'] == 'microspectrophotometries', 'Value'].iloc[0]
print(value)

317.0


In [None]:
df.head()

Unnamed: 0,Word,Char Count,Value
0,aa,2.0,2.0
1,aah,3.0,10.0
2,aahed,5.0,19.0
3,aahing,6.0,40.0
4,aahs,4.0,29.0


In [None]:
df['Char Count'].max()

28.0

In [None]:
df.max()

Unnamed: 0,0
Word,radiotelegraphy
Char Count,28.0
Value,317.0


In [None]:
df.loc[(df['Word'] == 'pinfish') | (df['Word'] == 'glowing'), 'Value']

Unnamed: 0,Value
62695,87.0
112842,81.0


In [None]:
df.loc[df['Word'].isin(['aa', 'aah', 'aahed']), 'Value']

Unnamed: 0,Value
0,2.0
1,10.0
2,19.0


In [None]:
df.describe()

Unnamed: 0,Char Count,Value
count,122541.0,122541.0
mean,9.18451,106.232902
std,2.866319,40.280897
min,2.0,2.0
25%,7.0,77.0
50%,9.0,102.0
75%,11.0,130.0
max,28.0,317.0


In [None]:
df.sort_values(by='Value', ascending=False).head(10)

Unnamed: 0,Word,Char Count,Value
91369,microspectrophotometries,24.0,317.0
91370,microspectrophotometry,22.0,309.0
91367,microspectrophotometers,23.0,308.0
105718,overintellectualizations,24.0,307.0
72762,immunoelectrophoretically,25.0,307.0
103501,otorhinolaryngologists,22.0,307.0
30605,constitutionalizations,22.0,305.0
38163,deinstitutionalizations,23.0,305.0
111928,photophosphorylations,21.0,304.0
77273,intersubstitutabilities,23.0,303.0


In [None]:
# what is the word with the value 317
df.loc[df['Value']== 317]

Unnamed: 0,Word,Char Count,Value
91369,microspectrophotometries,24.0,317.0


In [None]:
# what is the most common value?
# df['Value'].mode()
df['Value'].value_counts().head()

Unnamed: 0_level_0,count
Value,Unnamed: 1_level_1
93.0,1344
95.0,1341
92.0,1319
88.0,1318
100.0,1291


In [None]:
# what is the shortest word with value 274
# df.loc[df['Value']==274].sort_values(by='Char Count')
df.loc[(df['Value']==274) & (df['Char Count']==20)]

Unnamed: 0,Word,Char Count,Value
106059,overprotectivenesses,20.0,274.0


In [None]:
# Create a column ratio which represents the 'Value Ratio' of a word.
df['Value Ratio'] = df['Value'] / df['Char Count']
df.head()

Unnamed: 0,Word,Char Count,Value,Value Ratio
0,aa,2.0,2.0,1.0
1,aah,3.0,10.0,3.333333
2,aahed,5.0,19.0,3.8
3,aahing,6.0,40.0,6.666667
4,aahs,4.0,29.0,7.25


In [None]:
# what is the maximum value of ratio?
# df.sort_values(by='Value Ratio', ascending=False).head()
# df.loc[df['Value Ratio']==df['Value Ratio'].max()]
df['Value Ratio'].max()

22.2

In [None]:
# How many words have a ratio of 10?
# df.loc[df['Value Ratio']==10].shape
df.query('`Value Ratio` == 10').shape

(2077, 4)

In [None]:
# What is the maximum value of the words with Ratio of 10?
# df.query('`Value Ratio` == 10').sort_values(by='Char Count', ascending=False).head(1)
df.loc[df['Value Ratio']==10, 'Value'].max()

240.0

In [121]:
# Of those words with a Value of 260, what is the  lowest Char Count found?
df.loc[df['Value']==260, 'Char Count'].min()
# df.query('Value==260').sort_values(by='Char Count').head(1)

17.0

In [128]:
# Based on the previous task, what word is it?
df.query('Value==260').sort_values(by='Char Count').head(1)['Word']

Unnamed: 0,Word
71128,hydroxytryptamine


In [129]:
# Number of Elements in DataFrame?
df.shape

(122542, 4)

In [130]:
# What is the Value of 'microspectrophotometries'?
df.loc[df['Word']=='microspectrophotometries', 'Value']

Unnamed: 0,Value
91369,317.0


In [131]:
# What is the highest value in the DataFrame?
df['Value'].max()

317.0

In [139]:
# Which word has a value of 317?
df.query('Value==317')['Word']

Unnamed: 0,Word
91369,microspectrophotometries


In [140]:
# What is the highest possible length of a word?
df['Char Count'].max()

28.0

In [143]:
# What is the shortest word with a value of 274?
df.query('Value==274').sort_values(by='Char Count').head(1)['Word']

Unnamed: 0,Word
106059,overprotectivenesses


In [151]:
# How many words are there with 7 characters and a value of 87?
df.query('`Char Count`==7 & Value==87')['Word'].count()

340

In [154]:
# What are the words with a value of 100?
df.query('Value==100')['Word'].head()

Unnamed: 0,Word
68,abatements
169,abettors
437,absconders
498,absorbancy
703,accelerandos


In [159]:
# What is the most common value?
df['Value'].value_counts().head(1)

Unnamed: 0_level_0,count
Value,Unnamed: 1_level_1
93.0,1344


In [167]:
# What is the word with the highest ratio, and what is its value?
df.sort_values(by='Value Ratio', ascending=False).head(1)[['Word', 'Value']]

Unnamed: 0,Word,Value
96302,muzzy,111.0


In [171]:
# What is the word with a value of 260 and the lowest character count?
df.query('Value==260').sort_values(by='Char Count').min()[['Word', 'Char Count', 'Value Ratio']]

Unnamed: 0,0
Word,countermobilizations
Char Count,17.0
Value Ratio,13.0
