In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
%matplotlib inline

In [None]:
plt.style.use('fivethirtyeight')

In [None]:
df = pd.read_csv('data.csv')

# Cleaning 

In [None]:
df.drop(['time_period','time_period_type'],axis=1,inplace= True)

# 1. Show data field code 'AR0005' values for 'Argentina-Catamarca' for entire duration
 

In [None]:
df_new = df[(df.location=='Argentina-Catamarca') & (df.data_field_code=='AR0005')]

# Values 

In [None]:
df_new.value

In [None]:
df_new.report_date=pd.to_datetime(df_new.report_date)
df_new.value=pd.to_numeric(df_new.value)

In [None]:
df_new.plot('report_date','value')
plt.title('Data Field code AR0005 values for Argentina-Catamarca ')
plt.ylabel('Values')
plt.xlabel('Duration')
plt.show()

# 2. Plot 'Argentina-CABA' and 'Argentina-Catamarca' for number of cases values for entire duration to identify the difference 

In [None]:
df_2 = df[((df.location=='Argentina-CABA') | (df.location=='Argentina-Catamarca')) & (df.unit=='cases') ]

In [None]:
#df_2.report_date = pd.to_datetime(df_2.report_date)
df_2.value = pd.to_numeric(df_2.value)

In [None]:
df_2.drop(['location_type', 'data_field', 'data_field_code', 'unit'],axis=1,inplace=True)

In [None]:
df_2 = df_2.pivot_table(index='report_date',columns='location',aggfunc=sum)

In [None]:
df_2

In [None]:
fig, ax = plt.subplots(figsize=(15,7))
plt.title('Values for entire duration to identify the difference')
plt.ylabel("Value")
df_2.plot(kind='bar',ax=ax,rot=30)
plt.show()

# Entire Difference 

In [None]:
df_2.sum(axis=0).plot(kind='bar',color=['r','g'],rot=0)
plt.title('Total Difference')
plt.xlabel('Location')
plt.ylabel('Value')
plt.legend(loc=(1,1))
plt.show()

# 3. Plot how data field values are distributed across the locations 'Argentina-Buenos_Aires', 'Argentina-CABA', 'Argentina-Catamarca'


In [None]:
df3 = df[(df.location=='Argentina-CABA') | (df.location=='Argentina-Catamarca') | (df.location=='Argentina-Buenos_Aires')]

In [None]:
df3.report_date = pd.to_datetime(df3.report_date)
df3.value = pd.to_numeric(df3.value)

In [None]:
df3.drop(['location_type', 'data_field_code', 'unit'],axis=1,inplace=True)

In [None]:
df3l = df3.pivot_table(index='report_date',columns='location',aggfunc=sum)
df3d = df3.pivot_table(index='location',columns='data_field',aggfunc=sum)

In [None]:
vals=df3d.iloc[:].values
vals = np.reshape(vals,(1,18))
v = vals[(vals != 0)]

In [None]:
plt.figure(figsize=(7,7))
df3l.sum(axis=0).plot(kind='pie',autopct="%1.1f%%",labels=df3.location.unique(),legend=True)
plt.ylabel("")
plt.title("Values Distributed Across Location")
plt.legend(loc=(1.5,1))
plt.show()

In [None]:
df3d.sum(axis=0).plot(kind='pie',autopct="%1.1f%%",labels=df3.data_field.unique(),radius=1.6,legend=True)
plt.ylabel("")
plt.title("Values Distributed Across Data-field").set_position([0.3,1.5])
plt.legend(loc=(1.2,1))
plt.show()

#   "Data Field Values Distributed Across the Locations"

In [None]:
fig, ax = plt.subplots()

size = 0.7

cmap = plt.get_cmap("tab20c")
outer_colors = cmap(np.arange(3)*4)
inner_colors = cmap(np.array([11,17,13,15,11,12,13,17]))
ax.pie(df3l.sum(axis=0), radius=3, colors=outer_colors,labels=['Argentina-Buenos_Aires','Argentina-CABA','Argentina-Catamarca'],
       wedgeprops=dict(width=size, edgecolor='w'),startangle=90)
plt.title("Data Field Values\n Distributed Across\n the Locations",loc='center').set_position([0.5,0.5])
ax.pie(v, radius=3-size, colors=inner_colors,
       wedgeprops=dict(width=size, edgecolor='w'),startangle=90)
ax.legend(['Argentina-Buenos_Aires','Argentina-CABA','Argentina-Catamarca',
           'ccd','ccus','ccic','cpic','ccd','ccus','ccic','ccus'],loc=(2,1))
ax.set(aspect="equal")
plt.show()

# Location Values Distributed Across Datafields

In [None]:
fig, ax = plt.subplots()

size = 0.7

cmap = plt.get_cmap("tab20c")
outer_colors = cmap(np.arange(3)*4)
inner_colors = cmap(np.array([11,17,13,15,11,12,13,17]))

plt.title("Location Values\n Distributed Across\n Datafields",loc='center').set_position([0.5,0.5])
ax.pie(v, radius=3, colors=inner_colors,
       wedgeprops=dict(width=size, edgecolor='w'),startangle=90)
ax.pie(df3l.sum(axis=0), radius=3-size, colors=outer_colors,
       wedgeprops=dict(width=size, edgecolor='w'),startangle=90)
ax.legend(['ccd','ccus','ccic','cpic','ccd','ccus','ccic','ccus',
          'Argentina-Buenos_Aires','Argentina-CABA','Argentina-Catamarca'],loc=(2,1))
ax.set(aspect="equal")
plt.show()

# 4. Build a program to predict the correct word for wrongly typed word.

In [None]:
import re
from collections import Counter

def words(text):
    return re.findall(r'\w+', text.lower())

WORDS = Counter(words(open('big.txt').read()))

def P(word, N=sum(WORDS.values())): 
    return WORDS[word] / N

def correction(word): 
    return max(candidates(word), key=P)

def candidates(word): 
    return (known([word]) or known(edits1(word)) or known(edits2(word)) or [word])

def known(words): 
    return set(w for w in words if w in WORDS)

def edits1(word):
    letters    = 'abcdefghijklmnopqrstuvwxyz'
    splits     = [(word[:i], word[i:])    for i in range(len(word) + 1)]
    deletes    = [L + R[1:]               for L, R in splits if R]
    transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
    replaces   = [L + c + R[1:]           for L, R in splits if R for c in letters]
    inserts    = [L + c + R               for L, R in splits for c in letters]
    return set(deletes + transposes + replaces + inserts)

def edits2(word): 
    return (e2 for e1 in edits1(word) for e2 in edits1(e1))


s = input()
print("The correct word is :",correction(s))
