# Data Analysis
*Reference Notebook for Data Analysis in Python*

## Package Imports

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sqlalchemy import create_engine

## Data Wrangling

### Data Sources

- CSV
- Excel
- JSON
- Database (SQL)
- Database (NoSQL)
- Web (HTML)
- API
- Python Dictionary
- Python List
- NumPy Array
- Pandas Series

In [11]:
# Filenames
csv_filename = 'data/2020_CFS.csv'
excel_filename = 'data/2020_CFS.xlsx'
json_filename = 'data/2020_CFS.json'
sql_filename = 'data/Northwind_small.sqlite'
url = 'https://s3.amazonaws.com/assets.datacamp.com/production/course_1606/datasets/winequality-red.csv'
full_data = 'data/Call_for_Service_2020.csv'

In [3]:
# DataFrame from CSV
data = pd.read_csv(
    filepath_or_buffer=csv_filename,
    sep=',',
    header=0,
    names=None,  # array-like for column names if no headers in data
    index_col=None,
    usecols=None,  # list-like or callable to return subset of columns
    dtype=None,  # type-name or dict of column: type
    skiprows=None,
    nrows=None,
    na_values=None,  # additional strings to recognize as NA/NaN
    error_bad_lines=True,  # if False, drop malformed lines
    warn_bad_lines=True,  # if True and error_bad_lines False, show warning for each malformed line
)
data.head()

Unnamed: 0,NOPD_Item,Type,TypeText,Priority,InitialType,InitialTypeText,InitialPriority,MapX,MapY,TimeCreate,...,TimeArrive,TimeClosed,Disposition,DispositionText,SelfInitiated,Beat,BLOCK_ADDRESS,Zip,PoliceDistrict,Location
0,A0000120,94F,FIREWORKS,1A,103,DISTURBANCE (OTHER),1C,3677228,550814,01/01/2020 12:00:34 AM,...,,01/01/2020 06:53:08 AM,NAT,Necessary Action Taken,N,3N01,001XX Blk Riviera Ave,70122,3,POINT (-90.0808922 30.0086791)
1,A0000220,21,COMPLAINT OTHER,1J,21,COMPLAINT OTHER,1J,3668710,533007,01/01/2020 12:00:42 AM,...,01/01/2020 12:00:42 AM,01/01/2020 01:37:16 AM,NAT,Necessary Action Taken,Y,2U04,034XX Broadway St,70125,2,POINT (-90.10840522 29.95996774)
2,A0000320,94F,FIREWORKS,1A,94F,FIREWORKS,2J,3674930,533982,01/01/2020 12:01:05 AM,...,01/01/2020 02:08:17 AM,01/01/2020 02:34:36 AM,NAT,Necessary Action Taken,N,1H02,026XX Banks St,70119,1,POINT (-90.08872937 29.96246347)
3,A0000420,94,DISCHARGING FIREARM,2D,94,DISCHARGING FIREARM,2D,3681805,536653,01/01/2020 12:02:50 AM,...,01/01/2020 12:09:13 AM,01/01/2020 12:13:45 AM,GOA,GONE ON ARRIVAL,N,1A01,Kerlerec St & N Robertson St,70116,1,POINT (-90.0669267 29.96960271)
4,A0000520,94F,FIREWORKS,1A,94F,FIREWORKS,2J,3668697,542174,01/01/2020 12:03:46 AM,...,,01/01/2020 12:42:13 AM,NAT,Necessary Action Taken,N,3I01,053XX Memphis St,70124,3,POINT (-90.10813674 29.98517428)


In [4]:
# DataFrame from Web ()
data = pd.read_csv(url, sep=';')
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [5]:
# DataFrame from Excel
# requires xldr package
data = pd.read_excel(
    io=excel_filename,
    sheet_name=0,  # str, int, list, or None (read all sheets)
)
data.head()

Unnamed: 0,NOPD_Item,Type,TypeText,Priority,InitialType,InitialTypeText,InitialPriority,MapX,MapY,TimeCreate,...,TimeArrive,TimeClosed,Disposition,DispositionText,SelfInitiated,Beat,BLOCK_ADDRESS,Zip,PoliceDistrict,Location
0,A0000120,94F,FIREWORKS,1A,103,DISTURBANCE (OTHER),1C,3677228,550814,2020-01-01 00:00:34,...,NaT,2020-01-01 06:53:08,NAT,Necessary Action Taken,N,3N01,001XX Blk Riviera Ave,70122,3,POINT (-90.0808922 30.0086791)
1,A0000220,21,COMPLAINT OTHER,1J,21,COMPLAINT OTHER,1J,3668710,533007,2020-01-01 00:00:42,...,2020-01-01 00:00:42,2020-01-01 01:37:16,NAT,Necessary Action Taken,Y,2U04,034XX Broadway St,70125,2,POINT (-90.10840522 29.95996774)
2,A0000320,94F,FIREWORKS,1A,94F,FIREWORKS,2J,3674930,533982,2020-01-01 00:01:05,...,2020-01-01 02:08:17,2020-01-01 02:34:36,NAT,Necessary Action Taken,N,1H02,026XX Banks St,70119,1,POINT (-90.08872937 29.96246347)
3,A0000420,94,DISCHARGING FIREARM,2D,94,DISCHARGING FIREARM,2D,3681805,536653,2020-01-01 00:02:50,...,2020-01-01 00:09:13,2020-01-01 00:13:45,GOA,GONE ON ARRIVAL,N,1A01,Kerlerec St & N Robertson St,70116,1,POINT (-90.0669267 29.96960271)
4,A0000520,94F,FIREWORKS,1A,94F,FIREWORKS,2J,3668697,542174,2020-01-01 00:03:46,...,NaT,2020-01-01 00:42:13,NAT,Necessary Action Taken,N,3I01,053XX Memphis St,70124,3,POINT (-90.10813674 29.98517428)


In [6]:
# DataFrame from JSON
data = pd.read_json(json_filename)
data.head()

Unnamed: 0,NOPD_Item,Type,TypeText,Priority,InitialType,InitialTypeText,InitialPriority,MapX,MapY,TimeCreate,...,TimeArrive,TimeClosed,Disposition,DispositionText,SelfInitiated,Beat,BLOCK_ADDRESS,Zip,PoliceDistrict,Location
0,A0000120,94F,FIREWORKS,1A,103,DISTURBANCE (OTHER),1C,3677228,550814,01/01/2020 12:00:34 AM,...,,01/01/2020 06:53:08 AM,NAT,Necessary Action Taken,N,3N01,001XX Blk Riviera Ave,70122,3,POINT (-90.0808922 30.0086791)
1,A0000220,21,COMPLAINT OTHER,1J,21,COMPLAINT OTHER,1J,3668710,533007,01/01/2020 12:00:42 AM,...,01/01/2020 12:00:42 AM,01/01/2020 01:37:16 AM,NAT,Necessary Action Taken,Y,2U04,034XX Broadway St,70125,2,POINT (-90.10840522 29.95996774)
2,A0000320,94F,FIREWORKS,1A,94F,FIREWORKS,2J,3674930,533982,01/01/2020 12:01:05 AM,...,01/01/2020 02:08:17 AM,01/01/2020 02:34:36 AM,NAT,Necessary Action Taken,N,1H02,026XX Banks St,70119,1,POINT (-90.08872937 29.96246347)
3,A0000420,94,DISCHARGING FIREARM,2D,94,DISCHARGING FIREARM,2D,3681805,536653,01/01/2020 12:02:50 AM,...,01/01/2020 12:09:13 AM,01/01/2020 12:13:45 AM,GOA,GONE ON ARRIVAL,N,1A01,Kerlerec St & N Robertson St,70116,1,POINT (-90.0669267 29.96960271)
4,A0000520,94F,FIREWORKS,1A,94F,FIREWORKS,2J,3668697,542174,01/01/2020 12:03:46 AM,...,,01/01/2020 12:42:13 AM,NAT,Necessary Action Taken,N,3I01,053XX Memphis St,70124,3,POINT (-90.10813674 29.98517428)


In [7]:
# DataFrame from Database (SQL)
engine = create_engine('sqlite:///' + sql_filename)
data = pd.read_sql(
    sql='SELECT * FROM Customer',  # query or table name
    con=engine,
    index_col=None,
    params=None,  # for passing to SQL (driver dependent)
    columns=None,  # only when reading table
)
data.head()

Unnamed: 0,Id,CompanyName,ContactName,ContactTitle,Address,City,Region,PostalCode,Country,Phone,Fax
0,ALFKI,Alfreds Futterkiste,Maria Anders,Sales Representative,Obere Str. 57,Berlin,Western Europe,12209,Germany,030-0074321,030-0076545
1,ANATR,Ana Trujillo Emparedados y helados,Ana Trujillo,Owner,Avda. de la Constitución 2222,México D.F.,Central America,05021,Mexico,(5) 555-4729,(5) 555-3745
2,ANTON,Antonio Moreno Taquería,Antonio Moreno,Owner,Mataderos 2312,México D.F.,Central America,05023,Mexico,(5) 555-3932,
3,AROUT,Around the Horn,Thomas Hardy,Sales Representative,120 Hanover Sq.,London,British Isles,WA1 1DP,UK,(171) 555-7788,(171) 555-6750
4,BERGS,Berglunds snabbköp,Christina Berglund,Order Administrator,Berguvsvägen 8,Luleå,Northern Europe,S-958 22,Sweden,0921-12 34 65,0921-12 34 67


In [8]:
# DataFrame from Database (SQL) - Alternate Method
engine = create_engine('sqlite:///' + sql_filename)

with engine.connect() as con:
    rs = con.execute('SELECT * FROM Customer')
    data = pd.DataFrame(rs.fetchall())
    data.columns = rs.keys()

data.head()

Unnamed: 0,Id,CompanyName,ContactName,ContactTitle,Address,City,Region,PostalCode,Country,Phone,Fax
0,ALFKI,Alfreds Futterkiste,Maria Anders,Sales Representative,Obere Str. 57,Berlin,Western Europe,12209,Germany,030-0074321,030-0076545
1,ANATR,Ana Trujillo Emparedados y helados,Ana Trujillo,Owner,Avda. de la Constitución 2222,México D.F.,Central America,05021,Mexico,(5) 555-4729,(5) 555-3745
2,ANTON,Antonio Moreno Taquería,Antonio Moreno,Owner,Mataderos 2312,México D.F.,Central America,05023,Mexico,(5) 555-3932,
3,AROUT,Around the Horn,Thomas Hardy,Sales Representative,120 Hanover Sq.,London,British Isles,WA1 1DP,UK,(171) 555-7788,(171) 555-6750
4,BERGS,Berglunds snabbköp,Christina Berglund,Order Administrator,Berguvsvägen 8,Luleå,Northern Europe,S-958 22,Sweden,0921-12 34 65,0921-12 34 67


In [18]:
# TODO
# DataFrame from API

In [9]:
# DataFrame from Python Dictionary
d = {"country": ["Brazil", "Russia", "India", "China", "South Africa"],
     "capital": ["Brasilia", "Moscow", "New Delhi", "Beijing", "Pretoria"],
     "area": [8.516, 17.10, 3.286, 9.597, 1.221],
     "population": [200.4, 143.5, 1252, 1357, 52.98]}

data = pd.DataFrame(d)
data.head()

Unnamed: 0,country,capital,area,population
0,Brazil,Brasilia,8.516,200.4
1,Russia,Moscow,17.1,143.5
2,India,New Delhi,3.286,1252.0
3,China,Beijing,9.597,1357.0
4,South Africa,Pretoria,1.221,52.98


In [10]:
# Pandas Series
d = {"country": ["Brazil", "Russia", "India", "China", "South Africa"],
     "capital": ["Brasilia", "Moscow", "New Delhi", "Beijing", "Pretoria"],
     "area": [8.516, 17.10, 3.286, 9.597, 1.221],
     "population": [200.4, 143.5, 1252, 1357, 52.98]}

data = pd.DataFrame(d)

country = data['country']
capital = data['capital']
area = data['area']
population = data['population']

del(data)

data = pd.concat([country, capital, area, population], axis=1)
data.head()

Unnamed: 0,country,capital,area,population
0,Brazil,Brasilia,8.516,200.4
1,Russia,Moscow,17.1,143.5
2,India,New Delhi,3.286,1252.0
3,China,Beijing,9.597,1357.0
4,South Africa,Pretoria,1.221,52.98


### Basic Exploration

In [28]:
# Use full dataset
data = pd.read_csv(full_data)

In [52]:
# Preview top of DataFrame
data.head()

Unnamed: 0,NOPD_Item,Type,TypeText,Priority,InitialType,InitialTypeText,InitialPriority,MapX,MapY,TimeCreate,...,TimeArrive,TimeClosed,Disposition,DispositionText,SelfInitiated,Beat,BLOCK_ADDRESS,Zip,PoliceDistrict,Location
0,A0000120,94F,FIREWORKS,1A,103,DISTURBANCE (OTHER),1C,3677228,550814,01/01/2020 12:00:34 AM,...,,01/01/2020 06:53:08 AM,NAT,Necessary Action Taken,N,3N01,001XX Blk Riviera Ave,70122,3,POINT (-90.0808922 30.0086791)
1,A0000220,21,COMPLAINT OTHER,1J,21,COMPLAINT OTHER,1J,3668710,533007,01/01/2020 12:00:42 AM,...,01/01/2020 12:00:42 AM,01/01/2020 01:37:16 AM,NAT,Necessary Action Taken,Y,2U04,034XX Broadway St,70125,2,POINT (-90.10840522 29.95996774)
2,A0000320,94F,FIREWORKS,1A,94F,FIREWORKS,2J,3674930,533982,01/01/2020 12:01:05 AM,...,01/01/2020 02:08:17 AM,01/01/2020 02:34:36 AM,NAT,Necessary Action Taken,N,1H02,026XX Banks St,70119,1,POINT (-90.08872937 29.96246347)
3,A0000420,94,DISCHARGING FIREARM,2D,94,DISCHARGING FIREARM,2D,3681805,536653,01/01/2020 12:02:50 AM,...,01/01/2020 12:09:13 AM,01/01/2020 12:13:45 AM,GOA,GONE ON ARRIVAL,N,1A01,Kerlerec St & N Robertson St,70116,1,POINT (-90.0669267 29.96960271)
4,A0000520,94F,FIREWORKS,1A,94F,FIREWORKS,2J,3668697,542174,01/01/2020 12:03:46 AM,...,,01/01/2020 12:42:13 AM,NAT,Necessary Action Taken,N,3I01,053XX Memphis St,70124,3,POINT (-90.10813674 29.98517428)


In [53]:
# Preview bottom of DataFrame
data.tail()

Unnamed: 0,NOPD_Item,Type,TypeText,Priority,InitialType,InitialTypeText,InitialPriority,MapX,MapY,TimeCreate,...,TimeArrive,TimeClosed,Disposition,DispositionText,SelfInitiated,Beat,BLOCK_ADDRESS,Zip,PoliceDistrict,Location
407205,L0647220,22A,AREA CHECK,1K,22A,AREA CHECK,1K,0,0,12/06/2020 06:01:02 AM,...,12/06/2020 06:01:02 AM,12/06/2020 06:33:37 AM,NAT,Necessary Action Taken,Y,,Palm & Eagle Street,,0,POINT (0 0)
407206,L0652120,103,DISTURBANCE (OTHER),1C,103,DISTURBANCE (OTHER),1C,3680895,532769,12/06/2020 07:48:54 AM,...,12/06/2020 07:51:23 AM,12/06/2020 07:57:39 AM,NAT,Necessary Action Taken,N,1E04,Saint Louis St & N Rampart St,70112.0,1,POINT (-90.06993714 29.95895047)
407207,L0711920,22A,AREA CHECK,1K,22A,AREA CHECK,1K,0,0,12/06/2020 07:20:26 PM,...,12/06/2020 07:20:26 PM,12/06/2020 07:40:40 PM,NAT,Necessary Action Taken,Y,,Solomon Place Street & Saint Ann Street,,0,POINT (0 0)
407208,L0785720,67,THEFT,0A,67,THEFT,1G,3679908,529296,12/07/2020 11:15:01 AM,...,12/07/2020 05:16:22 PM,12/07/2020 05:22:24 PM,GOA,GONE ON ARRIVAL,N,8H02,005XX Baronne St,70113.0,8,POINT (-90.07317217 29.94942892)
407209,L0844420,94,DISCHARGING FIREARM,2D,94,DISCHARGING FIREARM,2D,3691801,526815,12/07/2020 08:19:05 PM,...,12/07/2020 08:41:03 PM,12/07/2020 08:44:07 PM,GOA,GONE ON ARRIVAL,N,4I01,012XX Blk Numa St,70114.0,4,POINT (-90.03571121 29.94224183)


In [54]:
# Display index
data.index

RangeIndex(start=0, stop=407210, step=1)

In [55]:
# Display columns
data.columns

Index(['NOPD_Item', 'Type', 'TypeText', 'Priority', 'InitialType',
       'InitialTypeText', 'InitialPriority', 'MapX', 'MapY', 'TimeCreate',
       'TimeDispatch', 'TimeArrive', 'TimeClosed', 'Disposition',
       'DispositionText', 'SelfInitiated', 'Beat', 'BLOCK_ADDRESS', 'Zip',
       'PoliceDistrict', 'Location'],
      dtype='object')

In [56]:
# Summary Statistics (of numeric columns)
data.describe()

Unnamed: 0,MapX,MapY,PoliceDistrict
count,407210.0,407210.0,407210.0
mean,4959663.0,648445.7,4.280172
std,6522579.0,578164.8,2.320668
min,0.0,0.0,0.0
25%,3672414.0,528135.0,2.0
50%,3681015.0,535750.0,4.0
75%,3694298.0,548878.0,6.0
max,37369000.0,3513814.0,8.0


In [59]:
# Sorting
#data.sort_index()

In [61]:
# Sort by values
data.sort_values(by='PoliceDistrict')

Unnamed: 0,NOPD_Item,Type,TypeText,Priority,InitialType,InitialTypeText,InitialPriority,MapX,MapY,TimeCreate,...,TimeArrive,TimeClosed,Disposition,DispositionText,SelfInitiated,Beat,BLOCK_ADDRESS,Zip,PoliceDistrict,Location
370983,K3291220,22A,AREA CHECK,1K,22A,AREA CHECK,1K,0,0,11/25/2020 11:58:32 PM,...,11/25/2020 11:58:32 PM,11/26/2020 01:33:25 AM,NAT,Necessary Action Taken,Y,,Olive & Hamilton Street,,0,POINT (0 0)
365795,J1667820,TS,TRAFFIC STOP,1J,TS,TRAFFIC STOP,1J,37369000,3513814,10/13/2020 09:00:42 PM,...,10/13/2020 09:00:42 PM,10/13/2020 09:01:07 PM,VOI,VOID,Y,1G01,Approx Loc: Poydras St,70112,0,POINT (0 0)
4816,D2540420,30S,HOMICIDE BY SHOOTING,2C,37,AGGRAVATED ASSAULT,2E,37369000,3513814,04/21/2020 09:33:59 PM,...,04/21/2020 09:36:30 PM,04/22/2020 12:24:03 AM,RTF,REPORT TO FOLLOW,N,6Q02,036XX Loyola Ave,,0,POINT (0 0)
145409,B0073220,62C,SIMPLE BURGLARY VEHICLE,1G,62C,SIMPLE BURGLARY VEHICLE,1G,37369000,3513814,02/01/2020 02:20:15 PM,...,02/01/2020 03:24:50 PM,02/01/2020 03:59:33 PM,RTF,REPORT TO FOLLOW,N,6O02,005XX Peniston St,70115,0,POINT (0 0)
397917,K1647420,22A,AREA CHECK,1K,22A,AREA CHECK,1K,0,0,11/13/2020 11:44:04 AM,...,11/13/2020 11:44:04 AM,11/13/2020 01:57:02 PM,NAT,Necessary Action Taken,Y,,Essex & Sullen,,0,POINT (0 0)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102039,A0620020,22A,AREA CHECK,1K,22A,AREA CHECK,1K,3681724,526932,01/06/2020 01:23:20 AM,...,01/06/2020 01:47:47 AM,01/06/2020 02:48:42 AM,NAT,Necessary Action Taken,N,8G01,N Diamond St & Tchoupitoulas St,70130,8,POINT (-90.06752224 29.94287546)
102035,A0423620,TS,TRAFFIC STOP,1J,TS,TRAFFIC STOP,1J,3682722,535088,01/04/2020 10:57:22 AM,...,01/04/2020 10:57:59 AM,01/04/2020 11:09:24 AM,NAT,Necessary Action Taken,N,8E02,N Rampart St & Barracks St,70116,8,POINT (-90.06408601 29.96526949)
147816,B1821920,62A,"BURGLAR ALARM, SILENT",1A,62A,"BURGLAR ALARM, SILENT",2E,3680175,528701,02/14/2020 04:31:20 PM,...,02/14/2020 04:36:54 PM,02/14/2020 04:42:33 PM,NAT,Necessary Action Taken,N,8H01,006XX Carondelet St,70130,8,POINT (-90.07235001 29.94778545)
374397,J3900520,58,RETURN FOR ADDITIONAL INFO,1I,58,RETURN FOR ADDITIONAL INFO,1I,3677702,531070,10/30/2020 10:05:45 AM,...,10/30/2020 10:05:51 AM,10/30/2020 12:13:35 PM,NAT,Necessary Action Taken,N,8J02,016XX Perdido St,70112,8,POINT (-90.08007699 29.95437505)


In [62]:
# Selection
# for production code use: .at, .iat, .loc, and .iloc

In [64]:
# Single column
data['Type']  # equivalent to data.Type

0         94F
1          21
2         94F
3          94
4         94F
         ... 
407205    22A
407206    103
407207    22A
407208     67
407209     94
Name: Type, Length: 407210, dtype: object

In [65]:
# Slice rows
data[:5]

Unnamed: 0,NOPD_Item,Type,TypeText,Priority,InitialType,InitialTypeText,InitialPriority,MapX,MapY,TimeCreate,...,TimeArrive,TimeClosed,Disposition,DispositionText,SelfInitiated,Beat,BLOCK_ADDRESS,Zip,PoliceDistrict,Location
0,A0000120,94F,FIREWORKS,1A,103,DISTURBANCE (OTHER),1C,3677228,550814,01/01/2020 12:00:34 AM,...,,01/01/2020 06:53:08 AM,NAT,Necessary Action Taken,N,3N01,001XX Blk Riviera Ave,70122,3,POINT (-90.0808922 30.0086791)
1,A0000220,21,COMPLAINT OTHER,1J,21,COMPLAINT OTHER,1J,3668710,533007,01/01/2020 12:00:42 AM,...,01/01/2020 12:00:42 AM,01/01/2020 01:37:16 AM,NAT,Necessary Action Taken,Y,2U04,034XX Broadway St,70125,2,POINT (-90.10840522 29.95996774)
2,A0000320,94F,FIREWORKS,1A,94F,FIREWORKS,2J,3674930,533982,01/01/2020 12:01:05 AM,...,01/01/2020 02:08:17 AM,01/01/2020 02:34:36 AM,NAT,Necessary Action Taken,N,1H02,026XX Banks St,70119,1,POINT (-90.08872937 29.96246347)
3,A0000420,94,DISCHARGING FIREARM,2D,94,DISCHARGING FIREARM,2D,3681805,536653,01/01/2020 12:02:50 AM,...,01/01/2020 12:09:13 AM,01/01/2020 12:13:45 AM,GOA,GONE ON ARRIVAL,N,1A01,Kerlerec St & N Robertson St,70116,1,POINT (-90.0669267 29.96960271)
4,A0000520,94F,FIREWORKS,1A,94F,FIREWORKS,2J,3668697,542174,01/01/2020 12:03:46 AM,...,,01/01/2020 12:42:13 AM,NAT,Necessary Action Taken,N,3I01,053XX Memphis St,70124,3,POINT (-90.10813674 29.98517428)


In [74]:
data.loc[:, ['Type', 'TypeText']]  # rows, then columns

Unnamed: 0,Type,TypeText
0,94F,FIREWORKS
1,21,COMPLAINT OTHER
2,94F,FIREWORKS
3,94,DISCHARGING FIREARM
4,94F,FIREWORKS
...,...,...
407205,22A,AREA CHECK
407206,103,DISTURBANCE (OTHER)
407207,22A,AREA CHECK
407208,67,THEFT


In [78]:
# Boolean indexing
data[data['PoliceDistrict'] == 8]

Unnamed: 0,NOPD_Item,Type,TypeText,Priority,InitialType,InitialTypeText,InitialPriority,MapX,MapY,TimeCreate,...,TimeArrive,TimeClosed,Disposition,DispositionText,SelfInitiated,Beat,BLOCK_ADDRESS,Zip,PoliceDistrict,Location
9,A0001020,21,COMPLAINT OTHER,1J,21,COMPLAINT OTHER,1J,3681919,532231,01/01/2020 12:05:18 AM,...,,01/01/2020 12:05:46 AM,RTF,REPORT TO FOLLOW,N,8D05,005XX Blk Bourbon St,70112,8,POINT (-90.06672195 29.95743852)
28,K3306820,24,MEDICAL,2D,24,MEDICAL,2D,37369000,3513814,11/26/2020 02:50:03 AM,...,11/26/2020 02:50:20 AM,11/26/2020 03:31:48 AM,NAT,Necessary Action Taken,N,8C02,003XX Royal St,70130,8,POINT (0 0)
51,I0334420,24,MEDICAL,2D,21,COMPLAINT OTHER,1J,37369000,3513814,09/03/2020 11:44:06 AM,...,09/03/2020 12:01:53 PM,09/03/2020 12:13:14 PM,NAT,Necessary Action Taken,N,8I02,004XX Loyola Ave,70112,8,POINT (0 0)
53,I0335820,67,THEFT,0A,67,THEFT,1G,3680835,530868,09/03/2020 11:54:18 AM,...,09/03/2020 11:08:41 PM,09/03/2020 11:30:42 PM,GOA,GONE ON ARRIVAL,N,8I01,001XX Carondelet St,70112,8,POINT (-90.07019246 29.9537258)
72,E2995920,24,MEDICAL,2D,24,MEDICAL,2D,37369000,3513814,05/24/2020 07:39:39 PM,...,05/24/2020 07:39:39 PM,05/24/2020 07:56:31 PM,NAT,Necessary Action Taken,Y,8I02,Gravier St & Loyola Ave,70112,8,POINT (0 0)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
407186,L0617320,21N,NOISE COMPLAINT,1G,21N,NOISE COMPLAINT,1G,3682758,525586,12/05/2020 11:00:01 PM,...,12/05/2020 11:36:34 PM,12/05/2020 11:36:51 PM,NAT,Necessary Action Taken,N,6C01,Calliope St & Convention Center Blvd,70130,8,POINT (-90.06430366 29.93914302)
407198,L0620420,911,SILENT 911 CALL,2F,911,SILENT 911 CALL,2F,3681681,531990,12/05/2020 11:30:33 PM,...,12/05/2020 11:33:09 PM,12/05/2020 11:38:40 PM,NAT,Necessary Action Taken,N,8D05,004XX Bourbon St,70112,8,POINT (-90.0674803 29.95678366)
407202,L0621220,911,SILENT 911 CALL,2F,911,SILENT 911 CALL,2F,3681686,531976,12/05/2020 11:41:23 PM,...,,12/05/2020 11:48:11 PM,NAT,Necessary Action Taken,N,8D05,004XX Bourbon St,70112,8,POINT (-90.06746523 29.95674521)
407204,L0621920,62A,"BURGLAR ALARM, SILENT",2E,62A,"BURGLAR ALARM, SILENT",2E,3681725,531330,12/05/2020 11:50:11 PM,...,12/05/2020 11:54:58 PM,12/05/2020 11:57:26 PM,NAT,Necessary Action Taken,N,8C02,003XX Royal St,70130,8,POINT (-90.06736533 29.95496856)


### Data Cleaning

- Column DataTypes
- Missing Values
- Replacing Values
- Duplicate Values
- Categorical Variabes
- Unit Conversion

### Data Manipulation

- Merging DataFrames
- Indexes
- Grouping
- Pivoting
- Melting
- Stacking
- Unstacking
- Filtering

## Data Visualization

- Pseudocolor Plots
- Scatterplots
- Distributions/Regressions
- ECDF
- Bar Plot
- Histogram
- Time-series Plot

## Inferential Statistics

- Summary Statistics (Mean, Median, Mode)
- Percentiles, outliers
- Variance, standard deviation
- Covariance, Pearson Correlation Coefficient
- Binomial Distribution
- Poisson Distribution
- PDF
- CDF
- Linear Regression
- Confidence Intervals
- Hypothesis Testing
- Statistical Power