### Installing Pandas Module

In [1]:
# python -m pip install pandas
# pip install pandas
# conda install pandas

### Loading Modules

In [135]:
import pandas as pd
import numpy as np
import random
import string
alphabet = string.ascii_uppercase
alphabet
datasets = ['uforeports','drinksbycountry','imdbratings']
for d in datasets:
    df = pd.read_csv(f'http://bit.ly/{d}')
    df.to_csv(f'data/{d}.csv')
    

### Pandas Version

In [3]:
pd.__version__

'1.3.4'

In [4]:
pd.show_versions()


INSTALLED VERSIONS
------------------
commit           : 945c9ed766a61c7d2c0a7cbb251b6edebf9cb7d5
python           : 3.9.7.final.0
python-bits      : 64
OS               : Windows
OS-release       : 10
Version          : 10.0.22000
machine          : AMD64
processor        : Intel64 Family 6 Model 126 Stepping 5, GenuineIntel
byteorder        : little
LC_ALL           : None
LANG             : en_GB.UTF-8
LOCALE           : English_Canada.1252

pandas           : 1.3.4
numpy            : 1.20.3
pytz             : 2021.3
dateutil         : 2.8.1
pip              : 21.2.4
setuptools       : 58.0.4
Cython           : 0.29.24
pytest           : 6.2.4
hypothesis       : None
sphinx           : 4.2.0
blosc            : None
feather          : None
xlsxwriter       : 3.0.1
lxml.etree       : 4.6.3
html5lib         : 1.1
pymysql          : None
psycopg2         : None
jinja2           : 2.11.3
IPython          : 7.20.0
pandas_datareader: None
bs4              : 4.10.0
bottleneck       : 1.3.2

## Reading/Writing Data

In [5]:
dir(pd)

['BooleanDtype',
 'Categorical',
 'CategoricalDtype',
 'CategoricalIndex',
 'DataFrame',
 'DateOffset',
 'DatetimeIndex',
 'DatetimeTZDtype',
 'ExcelFile',
 'ExcelWriter',
 'Flags',
 'Float32Dtype',
 'Float64Dtype',
 'Float64Index',
 'Grouper',
 'HDFStore',
 'Index',
 'IndexSlice',
 'Int16Dtype',
 'Int32Dtype',
 'Int64Dtype',
 'Int64Index',
 'Int8Dtype',
 'Interval',
 'IntervalDtype',
 'IntervalIndex',
 'MultiIndex',
 'NA',
 'NaT',
 'NamedAgg',
 'Period',
 'PeriodDtype',
 'PeriodIndex',
 'RangeIndex',
 'Series',
 'SparseDtype',
 'StringDtype',
 'Timedelta',
 'TimedeltaIndex',
 'Timestamp',
 'UInt16Dtype',
 'UInt32Dtype',
 'UInt64Dtype',
 'UInt64Index',
 'UInt8Dtype',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__docformat__',
 '__file__',
 '__getattr__',
 '__git_version__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '__version__',
 '_config',
 '_hashtable',
 '_is_numpy_dev',
 '_lib',
 '_libs',
 '_np_version_under1p18',
 '_testing',
 '_tslib',
 '_typing',
 

In [6]:
[x for x in dir(pd) if x.startswith('read')]

['read_clipboard',
 'read_csv',
 'read_excel',
 'read_feather',
 'read_fwf',
 'read_gbq',
 'read_hdf',
 'read_html',
 'read_json',
 'read_orc',
 'read_parquet',
 'read_pickle',
 'read_sas',
 'read_spss',
 'read_sql',
 'read_sql_query',
 'read_sql_table',
 'read_stata',
 'read_table',
 'read_xml']

In [7]:
[x for x in dir(pd) if x.startswith('to')]

['to_datetime', 'to_numeric', 'to_pickle', 'to_timedelta']

### Pandas Series

In [8]:
import pandas as pd
alphabet = string.ascii_lowercase
print(alphabet)
series = pd.Series(data=[1,2,3])
print(series)

abcdefghijklmnopqrstuvwxyz
0    1
1    2
2    3
dtype: int64


In [9]:
print(f'The index is {series.index}')
print(f'The values are {series.values}')

The index is RangeIndex(start=0, stop=3, step=1)
The values are [1 2 3]


In [10]:
series = pd.Series(data=[1,2,3],index=list('abc'),\
                   dtype='uint8')
print(series)

a    1
b    2
c    3
dtype: uint8


In [11]:
series = pd.Series(data=[1,2,3],index=list('abc'),\
                   dtype='uint8')
print(f'The index is {series.index}')
print(f'The values are {series.values}')

The index is Index(['a', 'b', 'c'], dtype='object')
The values are [1 2 3]


In [12]:
print(f"series['a':'b']={series['a':'b']}")

series['a':'b']=a    1
b    2
dtype: uint8


In [13]:
print(f'series[1:2]={series[1:2]}')

series[1:2]=b    2
dtype: uint8


In [14]:
series = pd.Series(data=[x for x in range(1,11)],index=list(alphabet[0:10]),\
                   dtype='uint8')
series

a     1
b     2
c     3
d     4
e     5
f     6
g     7
h     8
i     9
j    10
dtype: uint8

In [15]:
alphabet='abcdefghijklmnopqrstuvwxyz'
series = pd.Series([x for x in range(1,11)],\
                    index=list(alphabet[0:10]),\
                    dtype='uint8')
series['d':'i'] = 255
series[-1] = 100
series

a      1
b      2
c      3
d    255
e    255
f    255
g    255
h    255
i    255
j    100
dtype: uint8

In [16]:
alphabet='abcdefghijklmnopqrstuvwxyz'
series = pd.Series([x for x in range(1,11)],\
                    index=list(alphabet[0:10]),\
                    dtype='uint8')
series[1:5] = 255
series[[-1,-3]] = 100
series

a      1
b    255
c    255
d    255
e    255
f      6
g      7
h    100
i      9
j    100
dtype: uint8

In [17]:
import random as r
series = pd.Series(data=[r.random() for i in range(10)])
series

0    0.508598
1    0.532788
2    0.379401
3    0.443597
4    0.196052
5    0.634674
6    0.622410
7    0.483815
8    0.213608
9    0.544043
dtype: float64

In [18]:
x=pd.Series(['This',False,23,2.3])
x

0     This
1    False
2       23
3      2.3
dtype: object

In [19]:
scores = {'quizzes':89.3,'assignments':97.7,'midterm':75.45,'final':99.87}
series = pd.Series(data=scores)
series

quizzes        89.30
assignments    97.70
midterm        75.45
final          99.87
dtype: float64

In [20]:
colors = pd.Series(data=['red','red','green','blue',\
    'blue','red','red','green','blue'])
print(f'Frequencies\n{colors.value_counts()}')
colors.unique()

Frequencies
red      4
blue     3
green    2
dtype: int64


array(['red', 'green', 'blue'], dtype=object)

In [21]:
colors.value_counts()

red      4
blue     3
green    2
dtype: int64

### Pandas DataFrames

In [22]:
alphabet = string.ascii_uppercase
df = pd.DataFrame(np.random.randn(1000,4),\
            columns=list('abcd'))
df

Unnamed: 0,a,b,c,d
0,0.449697,-1.236549,1.421912,-1.928723
1,0.648078,0.234385,-0.718901,-0.668966
2,0.178554,0.371357,-1.029901,-1.309242
3,0.613950,0.920966,-0.470426,-0.267041
4,-0.421900,-0.611085,0.934652,0.590215
...,...,...,...,...
995,-1.222710,0.491947,-0.898868,-0.867675
996,1.359548,-0.806085,0.619217,-1.119447
997,1.007237,-0.311283,-2.659839,-1.728137
998,0.349643,1.409206,0.149829,-0.911503


In [23]:
df = pd.DataFrame(np.random.randn(4,10),index=list(alphabet)[:4],columns=list(alphabet[:10]))
df

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
A,-1.030123,0.984251,0.403531,0.556974,-2.273086,1.050234,-0.146117,-1.22553,-1.414626,-1.788116
B,0.49132,-1.892404,-1.337433,-2.287153,-1.011227,-0.474122,-1.038408,-0.718646,-0.569575,-0.622604
C,0.076485,-0.018897,0.649149,0.470931,1.590168,-0.751038,0.067366,0.347426,-0.618697,-0.785815
D,0.026729,2.461333,-1.924851,0.759412,1.038575,-1.412783,0.376021,1.528713,-1.162999,0.439236


In [24]:
pd.Series(['One','Two','Three','Four'])

0      One
1      Two
2    Three
3     Four
dtype: object

In [25]:
df['NewColumn']=['One','Two','Three','Four']
df

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,NewColumn
A,-1.030123,0.984251,0.403531,0.556974,-2.273086,1.050234,-0.146117,-1.22553,-1.414626,-1.788116,One
B,0.49132,-1.892404,-1.337433,-2.287153,-1.011227,-0.474122,-1.038408,-0.718646,-0.569575,-0.622604,Two
C,0.076485,-0.018897,0.649149,0.470931,1.590168,-0.751038,0.067366,0.347426,-0.618697,-0.785815,Three
D,0.026729,2.461333,-1.924851,0.759412,1.038575,-1.412783,0.376021,1.528713,-1.162999,0.439236,Four


In [26]:
df_1 = pd.DataFrame([np.arange(5),np.arange(5,10),np.arange(10,15)])
df_2 = pd.DataFrame(np.arange(15).reshape(3,5))
df_3 = pd.DataFrame(np.arange(15).reshape(5,3))
display(df_1,df_2,df_3)

Unnamed: 0,0,1,2,3,4
0,0,1,2,3,4
1,5,6,7,8,9
2,10,11,12,13,14


Unnamed: 0,0,1,2,3,4
0,0,1,2,3,4
1,5,6,7,8,9
2,10,11,12,13,14


Unnamed: 0,0,1,2
0,0,1,2
1,3,4,5
2,6,7,8
3,9,10,11
4,12,13,14


In [27]:
df = pd.DataFrame(np.arange(20).reshape(4,5))
df['Key']=['One','Two','Three','Four']
print(f'df=\n{df}')

df=
    0   1   2   3   4    Key
0   0   1   2   3   4    One
1   5   6   7   8   9    Two
2  10  11  12  13  14  Three
3  15  16  17  18  19   Four


In [28]:
df = pd.read_clipboard()
df.to_dict()

{'\\item': {},
 'What': {},
 'is': {},
 'the': {},
 'output': {},
 'of': {},
 'the.1': {},
 'following': {},
 '\\alert{Python}': {},
 'script?': {}}

In [29]:
population = { 'Q1 2022': {
  'Ontario': '14,951,825',
  'Manitoba': '1,390,249',
  'Saskatchewan': '1,183,269',
  'Alberta': '4,480,486',
  'British Columbia': '5,264,485',
  },
 'Q2 2022': {
  'Ontario': '15,007,816',
  'Manitoba': '1,393,179',
  'Saskatchewan': '1,186,308',
  'Alberta': '4,500,917',
  'British Columbia': '5,286,528',
  }}
df = pd.DataFrame(population)
df

Unnamed: 0,Q1 2022,Q2 2022
Ontario,14951825,15007816
Manitoba,1390249,1393179
Saskatchewan,1183269,1186308
Alberta,4480486,4500917
British Columbia,5264485,5286528


### Reading CSV Data

In [30]:
df = pd.read_csv('http://bit.ly/uforeports')
df.head()


Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00


In [31]:
[x for x in dir(df) if x.startswith('to')]

['to_clipboard',
 'to_csv',
 'to_dict',
 'to_excel',
 'to_feather',
 'to_gbq',
 'to_hdf',
 'to_html',
 'to_json',
 'to_latex',
 'to_markdown',
 'to_numpy',
 'to_parquet',
 'to_period',
 'to_pickle',
 'to_records',
 'to_sql',
 'to_stata',
 'to_string',
 'to_timestamp',
 'to_xarray',
 'to_xml']

In [32]:
df.tail()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
18236,Grant Park,,TRIANGLE,IL,12/31/2000 23:00
18237,Spirit Lake,,DISK,IA,12/31/2000 23:00
18238,Eagle River,,,WI,12/31/2000 23:45
18239,Eagle River,RED,LIGHT,WI,12/31/2000 23:45
18240,Ybor,,OVAL,FL,12/31/2000 23:59


In [33]:
df = pd.read_csv('data/arh.csv')
display(df.head(),df.tail())# display only in jupyter

Unnamed: 0,_id,Condition,ICD10,ICD10QF,SMRType,FinancialYear,Gender,GenderQF,AgeGroup,AgeGroupQF,...,EASRPatients,EASRNewPatients,EASRNewPatientsQF,NumberOfStays,NumberOfStaysQF,NumberOfPatients,NumberOfPatientsQF,NumberOfNewPatients,NumberOfNewPatientsQF,AverageNumberOfStaysPerPatient
0,1,All alcohol conditions,,d z,SMR01,1997/98,Male,,All,d,...,690.6,,:,21462,,15840,,,:,1.4
1,2,All alcohol conditions,,d z,SMR01,1997/98,Female,,All,d,...,253.0,,:,8232,,6543,,,:,1.3
2,3,All alcohol conditions,,d z,SMR01,1998/99,Male,,All,d,...,701.3,,:,21930,,16125,,,:,1.4
3,4,All alcohol conditions,,d z,SMR01,1998/99,Female,,All,d,...,259.5,,:,8637,,6702,,,:,1.3
4,5,All alcohol conditions,,d z,SMR01,1999/00,Male,,All,d,...,736.9,,:,23637,,16977,,,:,1.4


Unnamed: 0,_id,Condition,ICD10,ICD10QF,SMRType,FinancialYear,Gender,GenderQF,AgeGroup,AgeGroupQF,...,EASRPatients,EASRNewPatients,EASRNewPatientsQF,NumberOfStays,NumberOfStaysQF,NumberOfPatients,NumberOfPatientsQF,NumberOfNewPatients,NumberOfNewPatientsQF,AverageNumberOfStaysPerPatient
16207,16208,Alcohol-induced pancreatitis,"K86.0, K85.2",,SMR01,2017/18,Female,,All,d,...,5.5,1.7,,213,,153,,48,,1.4
16208,16209,Alcohol-induced pancreatitis,"K86.0, K85.2",,SMR01,2018/19,Male,,All,d,...,17.0,5.2,,648,,447,,138,,1.4
16209,16210,Alcohol-induced pancreatitis,"K86.0, K85.2",,SMR01,2018/19,Female,,All,d,...,5.3,1.8,,207,,147,,51,,1.4
16210,16211,Alcohol-induced pancreatitis,"K86.0, K85.2",,SMR01,2019/20,Male,,All,d,...,15.6,4.9,,579,,408,,129,,1.4
16211,16212,Alcohol-induced pancreatitis,"K86.0, K85.2",,SMR01,2019/20,Female,,All,d,...,5.2,1.7,,186,,144,,48,,1.3


In [34]:
df = pd.read_html('https://en.wikipedia.org/wiki/Minnesota')
type(df)

list

### Reading Excel Files

In [35]:
df = pd.read_excel('data/arh.xlsx')
display(df.head(3),df.tail(3))

Unnamed: 0,_id,Condition,ICD10,ICD10QF,SMRType,FinancialYear,Gender,GenderQF,AgeGroup,AgeGroupQF,...,EASRPatients,EASRNewPatients,EASRNewPatientsQF,NumberOfStays,NumberOfStaysQF,NumberOfPatients,NumberOfPatientsQF,NumberOfNewPatients,NumberOfNewPatientsQF,AverageNumberOfStaysPerPatient
0,1,All alcohol conditions,,d z,SMR01,1997/98,Male,,All,d,...,690.6,,:,21462,,15840,,,:,1.4
1,2,All alcohol conditions,,d z,SMR01,1997/98,Female,,All,d,...,253.0,,:,8232,,6543,,,:,1.3
2,3,All alcohol conditions,,d z,SMR01,1998/99,Male,,All,d,...,701.3,,:,21930,,16125,,,:,1.4


Unnamed: 0,_id,Condition,ICD10,ICD10QF,SMRType,FinancialYear,Gender,GenderQF,AgeGroup,AgeGroupQF,...,EASRPatients,EASRNewPatients,EASRNewPatientsQF,NumberOfStays,NumberOfStaysQF,NumberOfPatients,NumberOfPatientsQF,NumberOfNewPatients,NumberOfNewPatientsQF,AverageNumberOfStaysPerPatient
16209,16210,Alcohol-induced pancreatitis,"K86.0, K85.2",,SMR01,2018/19,Female,,All,d,...,5.3,1.8,,207,,147,,51,,1.4
16210,16211,Alcohol-induced pancreatitis,"K86.0, K85.2",,SMR01,2019/20,Male,,All,d,...,15.6,4.9,,579,,408,,129,,1.4
16211,16212,Alcohol-induced pancreatitis,"K86.0, K85.2",,SMR01,2019/20,Female,,All,d,...,5.2,1.7,,186,,144,,48,,1.3


### Reading Data from Clipboard

In [50]:
df = pd.read_clipboard()
display(df.head(3),df.tail(3))

Unnamed: 0,\item,What,is,the,output,of,the.1,following,\alert{Python},script?


Unnamed: 0,\item,What,is,the,output,of,the.1,following,\alert{Python},script?


In [37]:
print(df.head(),df.tail())

Empty DataFrame
Columns: [\item, What, is, the, output, of, the.1, following, \alert{Python}, script?]
Index: [] Empty DataFrame
Columns: [\item, What, is, the, output, of, the.1, following, \alert{Python}, script?]
Index: []


In [38]:
filename = 'data/arh.csv'
df = pd.read_csv(filename)
display(df.head(),df.tail())

Unnamed: 0,_id,Condition,ICD10,ICD10QF,SMRType,FinancialYear,Gender,GenderQF,AgeGroup,AgeGroupQF,...,EASRPatients,EASRNewPatients,EASRNewPatientsQF,NumberOfStays,NumberOfStaysQF,NumberOfPatients,NumberOfPatientsQF,NumberOfNewPatients,NumberOfNewPatientsQF,AverageNumberOfStaysPerPatient
0,1,All alcohol conditions,,d z,SMR01,1997/98,Male,,All,d,...,690.6,,:,21462,,15840,,,:,1.4
1,2,All alcohol conditions,,d z,SMR01,1997/98,Female,,All,d,...,253.0,,:,8232,,6543,,,:,1.3
2,3,All alcohol conditions,,d z,SMR01,1998/99,Male,,All,d,...,701.3,,:,21930,,16125,,,:,1.4
3,4,All alcohol conditions,,d z,SMR01,1998/99,Female,,All,d,...,259.5,,:,8637,,6702,,,:,1.3
4,5,All alcohol conditions,,d z,SMR01,1999/00,Male,,All,d,...,736.9,,:,23637,,16977,,,:,1.4


Unnamed: 0,_id,Condition,ICD10,ICD10QF,SMRType,FinancialYear,Gender,GenderQF,AgeGroup,AgeGroupQF,...,EASRPatients,EASRNewPatients,EASRNewPatientsQF,NumberOfStays,NumberOfStaysQF,NumberOfPatients,NumberOfPatientsQF,NumberOfNewPatients,NumberOfNewPatientsQF,AverageNumberOfStaysPerPatient
16207,16208,Alcohol-induced pancreatitis,"K86.0, K85.2",,SMR01,2017/18,Female,,All,d,...,5.5,1.7,,213,,153,,48,,1.4
16208,16209,Alcohol-induced pancreatitis,"K86.0, K85.2",,SMR01,2018/19,Male,,All,d,...,17.0,5.2,,648,,447,,138,,1.4
16209,16210,Alcohol-induced pancreatitis,"K86.0, K85.2",,SMR01,2018/19,Female,,All,d,...,5.3,1.8,,207,,147,,51,,1.4
16210,16211,Alcohol-induced pancreatitis,"K86.0, K85.2",,SMR01,2019/20,Male,,All,d,...,15.6,4.9,,579,,408,,129,,1.4
16211,16212,Alcohol-induced pancreatitis,"K86.0, K85.2",,SMR01,2019/20,Female,,All,d,...,5.2,1.7,,186,,144,,48,,1.3


In [39]:
import os
MB = 2**20
size_in_disk = os.stat(filename).st_size/MB
size_in_ram = df.memory_usage('deep').sum()/MB
print(f'The size of the file in disk is {size_in_disk:.2f} MB')
print(f'The size of the file in ram is {size_in_ram:.2f} MB')

The size of the file in disk is 1.75 MB
The size of the file in ram is 2.60 MB


In [40]:
df.index

RangeIndex(start=0, stop=16212, step=1)

In [41]:
df.columns

Index(['_id', 'Condition', 'ICD10', 'ICD10QF', 'SMRType', 'FinancialYear',
       'Gender', 'GenderQF', 'AgeGroup', 'AgeGroupQF', 'EASRStays',
       'EASRPatients', 'EASRNewPatients', 'EASRNewPatientsQF', 'NumberOfStays',
       'NumberOfStaysQF', 'NumberOfPatients', 'NumberOfPatientsQF',
       'NumberOfNewPatients', 'NumberOfNewPatientsQF',
       'AverageNumberOfStaysPerPatient'],
      dtype='object')

In [42]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16212 entries, 0 to 16211
Data columns (total 21 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   _id                             16212 non-null  int64  
 1   Condition                       16212 non-null  object 
 2   ICD10                           16212 non-null  object 
 3   ICD10QF                         16212 non-null  object 
 4   SMRType                         16212 non-null  object 
 5   FinancialYear                   16212 non-null  object 
 6   Gender                          16212 non-null  object 
 7   GenderQF                        16212 non-null  object 
 8   AgeGroup                        16212 non-null  object 
 9   AgeGroupQF                      16212 non-null  object 
 10  EASRStays                       16212 non-null  float64
 11  EASRPatients                    16212 non-null  float64
 12  EASRNewPatients                 

In [43]:
df.describe()

Unnamed: 0,_id,EASRStays,EASRPatients,NumberOfStays,NumberOfPatients,AverageNumberOfStaysPerPatient
count,16212.0,16212.0,16212.0,16212.0,16212.0,16212.0
mean,8106.5,223.571139,160.489724,859.832161,622.840859,1.19261
std,4680.145617,350.885702,233.484349,2206.162604,1530.210146,0.434618
min,1.0,0.0,0.0,0.0,0.0,0.0
25%,4053.75,14.375,11.7,48.0,39.0,1.1
50%,8106.5,80.9,63.4,225.0,180.0,1.2
75%,12159.25,258.3,200.45,762.0,579.0,1.4
max,16212.0,2251.3,1459.8,33114.0,21513.0,5.0


In [44]:
df.memory_usage('deep')

Index                                128
_id                               129696
Condition                         129696
ICD10                             129696
ICD10QF                           129696
SMRType                           129696
FinancialYear                     129696
Gender                            129696
GenderQF                          129696
AgeGroup                          129696
AgeGroupQF                        129696
EASRStays                         129696
EASRPatients                      129696
EASRNewPatients                   129696
EASRNewPatientsQF                 129696
NumberOfStays                     129696
NumberOfStaysQF                   129696
NumberOfPatients                  129696
NumberOfPatientsQF                129696
NumberOfNewPatients               129696
NumberOfNewPatientsQF             129696
AverageNumberOfStaysPerPatient    129696
dtype: int64

In [45]:
df.isna()

Unnamed: 0,_id,Condition,ICD10,ICD10QF,SMRType,FinancialYear,Gender,GenderQF,AgeGroup,AgeGroupQF,...,EASRPatients,EASRNewPatients,EASRNewPatientsQF,NumberOfStays,NumberOfStaysQF,NumberOfPatients,NumberOfPatientsQF,NumberOfNewPatients,NumberOfNewPatientsQF,AverageNumberOfStaysPerPatient
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16207,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
16208,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
16209,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
16210,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [46]:
df.isna().sum()

_id                               0
Condition                         0
ICD10                             0
ICD10QF                           0
SMRType                           0
FinancialYear                     0
Gender                            0
GenderQF                          0
AgeGroup                          0
AgeGroupQF                        0
EASRStays                         0
EASRPatients                      0
EASRNewPatients                   0
EASRNewPatientsQF                 0
NumberOfStays                     0
NumberOfStaysQF                   0
NumberOfPatients                  0
NumberOfPatientsQF                0
NumberOfNewPatients               0
NumberOfNewPatientsQF             0
AverageNumberOfStaysPerPatient    0
dtype: int64

In [47]:
df.duplicated()

0        False
1        False
2        False
3        False
4        False
         ...  
16207    False
16208    False
16209    False
16210    False
16211    False
Length: 16212, dtype: bool

In [48]:
df = pd.read_html('https://en.wikipedia.org/wiki/Population_of_Canada_by_province_and_territory')
df

[   Population                    Name[1] Population,2021 Census             \
    Population                    Name[1]                  Total Proportion   
 0           1                    Ontario               14223942     38.45%   
 1           2                     Quebec                8501833     22.98%   
 2           3           British Columbia                5000879     13.52%   
 3           4                    Alberta                4262635     11.52%   
 4           5                   Manitoba                1342153      3.63%   
 5           6               Saskatchewan                1132505      3.06%   
 6           7                Nova Scotia                 969383      2.62%   
 7           8              New Brunswick                 775610      2.09%   
 8           9  Newfoundland and Labrador                 510550      1.38%   
 9          10       Prince Edward Island                 154331      0.42%   
 10         11      Northwest Territories           

In [49]:
df[0]

Unnamed: 0_level_0,Population,Name[1],"Population,2021 Census","Population,2021 Census","Growth,2016–21",Land area(km2),Populationdensity(per km2),House ofCommons seats,House ofCommons seats,Senate seats,Senate seats
Unnamed: 0_level_1,Population,Name[1],Total,Proportion,"Growth,2016–21",Land area(km2),Populationdensity(per km2),Total,Proportion,Total,Proportion
0,1,Ontario,14223942,38.45%,5.8%,908699.33,15.2,121,35.8%,24,22.86%
1,2,Quebec,8501833,22.98%,4.1%,1356625.27,6.5,78,23.1%,24,22.86%
2,3,British Columbia,5000879,13.52%,7.6%,922503.01,5.4,42,12.4%,6,5.71%
3,4,Alberta,4262635,11.52%,4.8%,640330.46,6.7,34,10.1%,6,5.71%
4,5,Manitoba,1342153,3.63%,5.8%,552370.99,2.3,14,4.1%,6,5.71%
5,6,Saskatchewan,1132505,3.06%,3.4%,588243.54,2.0,14,4.1%,6,5.71%
6,7,Nova Scotia,969383,2.62%,5.0%,52942.27,18.4,11,3.3%,10,9.52%
7,8,New Brunswick,775610,2.09%,3.8%,71388.81,10.9,10,3.0%,10,9.52%
8,9,Newfoundland and Labrador,510550,1.38%,-1.8%,370514.08,1.4,7,2.1%,6,5.71%
9,10,Prince Edward Island,154331,0.42%,8.0%,5686.03,27.2,4,1.2%,4,3.81%


### Data Filtring

In [61]:
movies = pd.read_csv('http://bit.ly/imdbratings')
movies.to_csv('data/movies.csv',index=False)

In [57]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 979 entries, 0 to 978
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   star_rating     979 non-null    float64
 1   title           979 non-null    object 
 2   content_rating  976 non-null    object 
 3   genre           979 non-null    object 
 4   duration        979 non-null    int64  
 5   actors_list     979 non-null    object 
dtypes: float64(1), int64(1), object(4)
memory usage: 46.0+ KB


In [58]:
movies.shape

(979, 6)

In [59]:
movies.genre.value_counts()

Drama        278
Comedy       156
Action       136
Crime        124
Biography     77
Adventure     75
Animation     62
Horror        29
Mystery       16
Western        9
Sci-Fi         5
Thriller       5
Film-Noir      3
Family         2
History        1
Fantasy        1
Name: genre, dtype: int64

In [60]:
movies['genre'].value_counts()

Drama        278
Comedy       156
Action       136
Crime        124
Biography     77
Adventure     75
Animation     62
Horror        29
Mystery       16
Western        9
Sci-Fi         5
Thriller       5
Film-Noir      3
Family         2
History        1
Fantasy        1
Name: genre, dtype: int64

In [63]:
movies.star_rating.value_counts()

7.6    124
7.8    116
7.7    113
7.5    108
8.1    103
8.0     97
7.9     75
8.2     51
7.4     49
8.3     43
8.4     37
8.5     23
8.6     15
8.7     10
8.9      6
8.8      5
9.2      1
9.0      1
9.1      1
9.3      1
Name: star_rating, dtype: int64

In [64]:
movies.loc[0:3,:]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."


In [65]:
movies.loc[:,'title']

0                             The Shawshank Redemption
1                                        The Godfather
2                               The Godfather: Part II
3                                      The Dark Knight
4                                         Pulp Fiction
                            ...                       
974                                            Tootsie
975                        Back to the Future Part III
976    Master and Commander: The Far Side of the World
977                                        Poltergeist
978                                        Wall Street
Name: title, Length: 979, dtype: object

In [67]:
movies.loc[:,['title','star_rating']]

Unnamed: 0,title,star_rating
0,The Shawshank Redemption,9.3
1,The Godfather,9.2
2,The Godfather: Part II,9.1
3,The Dark Knight,9.0
4,Pulp Fiction,8.9
...,...,...
974,Tootsie,7.4
975,Back to the Future Part III,7.4
976,Master and Commander: The Far Side of the World,7.4
977,Poltergeist,7.4


In [71]:
movies.loc[975:978,:]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
975,7.4,Back to the Future Part III,PG,Adventure,118,"[u'Michael J. Fox', u'Christopher Lloyd', u'Ma..."
976,7.4,Master and Commander: The Far Side of the World,PG-13,Action,138,"[u'Russell Crowe', u'Paul Bettany', u'Billy Bo..."
977,7.4,Poltergeist,PG,Horror,114,"[u'JoBeth Williams', u""Heather O'Rourke"", u'Cr..."
978,7.4,Wall Street,R,Crime,126,"[u'Charlie Sheen', u'Michael Douglas', u'Tamar..."


In [74]:
movies.loc[[x for x in movies.index if x % 2 == 1],['title','star_rating']]

Unnamed: 0,title,star_rating
1,The Godfather,9.2
3,The Dark Knight,9.0
5,12 Angry Men,8.9
7,The Lord of the Rings: The Return of the King,8.9
9,Fight Club,8.9
...,...,...
969,Law Abiding Citizen,7.4
971,Death at a Funeral,7.4
973,The Cider House Rules,7.4
975,Back to the Future Part III,7.4


In [75]:
movies.iloc[0:5,1:3]

Unnamed: 0,title,content_rating
0,The Shawshank Redemption,R
1,The Godfather,R
2,The Godfather: Part II,R
3,The Dark Knight,PG-13
4,Pulp Fiction,R


In [76]:
movies.iloc[-3:,:]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
976,7.4,Master and Commander: The Far Side of the World,PG-13,Action,138,"[u'Russell Crowe', u'Paul Bettany', u'Billy Bo..."
977,7.4,Poltergeist,PG,Horror,114,"[u'JoBeth Williams', u""Heather O'Rourke"", u'Cr..."
978,7.4,Wall Street,R,Crime,126,"[u'Charlie Sheen', u'Michael Douglas', u'Tamar..."


In [78]:
movies.iloc[-3:]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
976,7.4,Master and Commander: The Far Side of the World,PG-13,Action,138,"[u'Russell Crowe', u'Paul Bettany', u'Billy Bo..."
977,7.4,Poltergeist,PG,Horror,114,"[u'JoBeth Williams', u""Heather O'Rourke"", u'Cr..."
978,7.4,Wall Street,R,Crime,126,"[u'Charlie Sheen', u'Michael Douglas', u'Tamar..."


In [77]:
movies.iloc[:,['title','star_rating']] 

IndexError: .iloc requires numeric indexers, got ['title' 'star_rating']

In [82]:
movies.iloc[:,2].unique()

array(['R', 'PG-13', 'NOT RATED', 'PG', 'UNRATED', 'APPROVED', 'PASSED',
       'G', 'X', nan, 'TV-MA', 'GP', 'NC-17'], dtype=object)

In [83]:
movies.iloc[[x for x in movies.index if x % 2 == 1],\
            [1,0]]

Unnamed: 0,title,star_rating
1,The Godfather,9.2
3,The Dark Knight,9.0
5,12 Angry Men,8.9
7,The Lord of the Rings: The Return of the King,8.9
9,Fight Club,8.9
...,...,...
969,Law Abiding Citizen,7.4
971,Death at a Funeral,7.4
973,The Cider House Rules,7.4
975,Back to the Future Part III,7.4


In [84]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.to_csv('data/drinks.csv',index=False)
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [85]:
movies.loc[:,[1,0]]

KeyError: "None of [Int64Index([1, 0], dtype='int64')] are in the [columns]"

In [89]:
dir(movies)

['T',
 '_AXIS_LEN',
 '_AXIS_ORDERS',
 '_AXIS_REVERSED',
 '_AXIS_TO_AXIS_NUMBER',
 '_HANDLED_TYPES',
 '__abs__',
 '__add__',
 '__and__',
 '__annotations__',
 '__array__',
 '__array_priority__',
 '__array_ufunc__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdivmod__',
 '__reduce__',
 '__reduce_ex

In [90]:
[x for x in dir(movies) if x.startswith('i')]

['iat',
 'idxmax',
 'idxmin',
 'iloc',
 'index',
 'infer_objects',
 'info',
 'insert',
 'interpolate',
 'isin',
 'isna',
 'isnull',
 'items',
 'iteritems',
 'iterrows',
 'itertuples']

In [99]:
#condition = movies.genre == 'crime'
condition = movies['genre'] == 'Action'
condition

0      False
1      False
2      False
3       True
4      False
       ...  
974    False
975    False
976     True
977    False
978    False
Name: genre, Length: 979, dtype: bool

In [100]:
movies[condition]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
11,8.8,Inception,PG-13,Action,148,"[u'Leonardo DiCaprio', u'Joseph Gordon-Levitt'..."
12,8.8,Star Wars: Episode V - The Empire Strikes Back,PG,Action,124,"[u'Mark Hamill', u'Harrison Ford', u'Carrie Fi..."
19,8.7,Star Wars,PG,Action,121,"[u'Mark Hamill', u'Harrison Ford', u'Carrie Fi..."
20,8.7,The Matrix,R,Action,136,"[u'Keanu Reeves', u'Laurence Fishburne', u'Car..."
...,...,...,...,...,...,...
918,7.5,Running Scared,R,Action,122,"[u'Paul Walker', u'Cameron Bright', u'Chazz Pa..."
954,7.4,X-Men,PG-13,Action,104,"[u'Patrick Stewart', u'Hugh Jackman', u'Ian Mc..."
963,7.4,La Femme Nikita,R,Action,118,"[u'Anne Parillaud', u'Marc Duret', u'Patrick F..."
967,7.4,The Rock,R,Action,136,"[u'Sean Connery', u'Nicolas Cage', u'Ed Harris']"


In [102]:
movies.loc[condition,:]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
11,8.8,Inception,PG-13,Action,148,"[u'Leonardo DiCaprio', u'Joseph Gordon-Levitt'..."
12,8.8,Star Wars: Episode V - The Empire Strikes Back,PG,Action,124,"[u'Mark Hamill', u'Harrison Ford', u'Carrie Fi..."
19,8.7,Star Wars,PG,Action,121,"[u'Mark Hamill', u'Harrison Ford', u'Carrie Fi..."
20,8.7,The Matrix,R,Action,136,"[u'Keanu Reeves', u'Laurence Fishburne', u'Car..."
...,...,...,...,...,...,...
918,7.5,Running Scared,R,Action,122,"[u'Paul Walker', u'Cameron Bright', u'Chazz Pa..."
954,7.4,X-Men,PG-13,Action,104,"[u'Patrick Stewart', u'Hugh Jackman', u'Ian Mc..."
963,7.4,La Femme Nikita,R,Action,118,"[u'Anne Parillaud', u'Marc Duret', u'Patrick F..."
967,7.4,The Rock,R,Action,136,"[u'Sean Connery', u'Nicolas Cage', u'Ed Harris']"


In [106]:
long_movies = movies.duration >= 120
movies[long_movies].loc[:,'star_rating':'duration']

Unnamed: 0,star_rating,title,content_rating,genre,duration
0,9.3,The Shawshank Redemption,R,Crime,142
1,9.2,The Godfather,R,Crime,175
2,9.1,The Godfather: Part II,R,Crime,200
3,9.0,The Dark Knight,PG-13,Action,152
4,8.9,Pulp Fiction,R,Crime,154
...,...,...,...,...,...
967,7.4,The Rock,R,Action,136
968,7.4,The English Patient,R,Drama,162
973,7.4,The Cider House Rules,PG-13,Drama,126
976,7.4,Master and Commander: The Far Side of the World,PG-13,Action,138


In [104]:
movies.loc[long_movies,'star_rating':'duration']

Unnamed: 0,star_rating,title,content_rating,genre,duration
0,9.3,The Shawshank Redemption,R,Crime,142
1,9.2,The Godfather,R,Crime,175
2,9.1,The Godfather: Part II,R,Crime,200
3,9.0,The Dark Knight,PG-13,Action,152
4,8.9,Pulp Fiction,R,Crime,154
...,...,...,...,...,...
967,7.4,The Rock,R,Action,136
968,7.4,The English Patient,R,Drama,162
973,7.4,The Cider House Rules,PG-13,Drama,126
976,7.4,Master and Commander: The Far Side of the World,PG-13,Action,138


In [119]:
condition = (movies.genre == 'Drama') & (movies.content_rating == 'R')
movies[condition]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
9,8.9,Fight Club,R,Drama,139,"[u'Brad Pitt', u'Edward Norton', u'Helena Bonh..."
16,8.7,One Flew Over the Cuckoo's Nest,R,Drama,133,"[u'Jack Nicholson', u'Louise Fletcher', u'Mich..."
24,8.7,Se7en,R,Drama,127,"[u'Morgan Freeman', u'Brad Pitt', u'Kevin Spac..."
27,8.6,The Silence of the Lambs,R,Drama,118,"[u'Jodie Foster', u'Anthony Hopkins', u'Lawren..."
33,8.6,Whiplash,R,Drama,107,"[u'Miles Teller', u'J.K. Simmons', u'Melissa B..."
...,...,...,...,...,...,...
945,7.4,Take Shelter,R,Drama,120,"[u'Michael Shannon', u'Jessica Chastain', u'Sh..."
951,7.4,Sleepy Hollow,R,Drama,105,"[u'Johnny Depp', u'Christina Ricci', u'Miranda..."
955,7.4,Zero Dark Thirty,R,Drama,157,"[u'Jessica Chastain', u'Joel Edgerton', u'Chri..."
968,7.4,The English Patient,R,Drama,162,"[u'Ralph Fiennes', u'Juliette Binoche', u'Will..."


In [121]:
movies.loc[condition,:]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
9,8.9,Fight Club,R,Drama,139,"[u'Brad Pitt', u'Edward Norton', u'Helena Bonh..."
16,8.7,One Flew Over the Cuckoo's Nest,R,Drama,133,"[u'Jack Nicholson', u'Louise Fletcher', u'Mich..."
24,8.7,Se7en,R,Drama,127,"[u'Morgan Freeman', u'Brad Pitt', u'Kevin Spac..."
27,8.6,The Silence of the Lambs,R,Drama,118,"[u'Jodie Foster', u'Anthony Hopkins', u'Lawren..."
33,8.6,Whiplash,R,Drama,107,"[u'Miles Teller', u'J.K. Simmons', u'Melissa B..."
...,...,...,...,...,...,...
945,7.4,Take Shelter,R,Drama,120,"[u'Michael Shannon', u'Jessica Chastain', u'Sh..."
951,7.4,Sleepy Hollow,R,Drama,105,"[u'Johnny Depp', u'Christina Ricci', u'Miranda..."
955,7.4,Zero Dark Thirty,R,Drama,157,"[u'Jessica Chastain', u'Joel Edgerton', u'Chri..."
968,7.4,The English Patient,R,Drama,162,"[u'Ralph Fiennes', u'Juliette Binoche', u'Will..."


In [122]:
condition = (movies.genre == 'Drama') & (movies.duration > 200)
movies[condition]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
17,8.7,Seven Samurai,UNRATED,Drama,207,"[u'Toshir\xf4 Mifune', u'Takashi Shimura', u'K..."
157,8.2,Gone with the Wind,G,Drama,238,"[u'Clark Gable', u'Vivien Leigh', u'Thomas Mit..."
476,7.8,Hamlet,PG-13,Drama,242,"[u'Kenneth Branagh', u'Julie Christie', u'Dere..."


In [124]:
condition = (movies.duration <90) | (movies.duration > 200)
movies[condition]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
7,8.9,The Lord of the Rings: The Return of the King,PG-13,Adventure,201,"[u'Elijah Wood', u'Viggo Mortensen', u'Ian McK..."
17,8.7,Seven Samurai,UNRATED,Drama,207,"[u'Toshir\xf4 Mifune', u'Takashi Shimura', u'K..."
29,8.6,City Lights,PASSED,Comedy,87,"[u'Charles Chaplin', u'Virginia Cherrill', u'F..."
35,8.6,Modern Times,G,Comedy,87,"[u'Charles Chaplin', u'Paulette Goddard', u'He..."
55,8.5,The Lion King,G,Animation,89,"[u'Matthew Broderick', u'Jeremy Irons', u'Jame..."
...,...,...,...,...,...,...
892,7.5,Fruitvale Station,R,Biography,85,"[u'Michael B. Jordan', u'Melonie Diaz', u'Octa..."
938,7.4,Alice in Wonderland,G,Animation,75,"[u'Kathryn Beaumont', u'Ed Wynn', u'Richard Ha..."
947,7.4,Eraserhead,UNRATED,Drama,89,"[u'Jack Nance', u'Charlotte Stewart', u'Allen ..."
948,7.4,Frances Ha,R,Comedy,86,"[u'Greta Gerwig', u'Mickey Sumner', u'Adam Dri..."


In [128]:
movies.sort_values(ascending=False,by='star_rating')[0:10]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
4,8.9,Pulp Fiction,R,Crime,154,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."
5,8.9,12 Angry Men,NOT RATED,Drama,96,"[u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals..."
6,8.9,"The Good, the Bad and the Ugly",NOT RATED,Western,161,"[u'Clint Eastwood', u'Eli Wallach', u'Lee Van ..."
7,8.9,The Lord of the Rings: The Return of the King,PG-13,Adventure,201,"[u'Elijah Wood', u'Viggo Mortensen', u'Ian McK..."
8,8.9,Schindler's List,R,Biography,195,"[u'Liam Neeson', u'Ralph Fiennes', u'Ben Kings..."
9,8.9,Fight Club,R,Drama,139,"[u'Brad Pitt', u'Edward Norton', u'Helena Bonh..."


In [129]:
movies.sort_values(by='star_rating')[0:10]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
978,7.4,Wall Street,R,Crime,126,"[u'Charlie Sheen', u'Michael Douglas', u'Tamar..."
950,7.4,Bound,R,Crime,108,"[u'Jennifer Tilly', u'Gina Gershon', u'Joe Pan..."
949,7.4,Home Alone,PG,Comedy,103,"[u'Macaulay Culkin', u'Joe Pesci', u'Daniel St..."
948,7.4,Frances Ha,R,Comedy,86,"[u'Greta Gerwig', u'Mickey Sumner', u'Adam Dri..."
947,7.4,Eraserhead,UNRATED,Drama,89,"[u'Jack Nance', u'Charlotte Stewart', u'Allen ..."
946,7.4,Far from Heaven,PG-13,Drama,107,"[u'Julianne Moore', u'Dennis Quaid', u'Dennis ..."
945,7.4,Take Shelter,R,Drama,120,"[u'Michael Shannon', u'Jessica Chastain', u'Sh..."
944,7.4,Terms of Endearment,R,Comedy,132,"[u'Shirley MacLaine', u'Debra Winger', u'Jack ..."
943,7.4,The Bucket List,PG-13,Adventure,97,"[u'Jack Nicholson', u'Morgan Freeman', u'Sean ..."
942,7.4,Raising Arizona,PG-13,Comedy,94,"[u'Nicolas Cage', u'Holly Hunter', u'Trey Wils..."
