In [1]:
import pandas as pd

In [2]:
file = "Resources/baton_streets.csv"

In [3]:
original_df = pd.read_csv(file)
original_df.head()

Unnamed: 0,STREET NAME ID,STREET NAME,STREET FULL NAME,POSTAL COMMUNITY,MUNICIPAL COMMUNITY
0,1400342,PRIVATE STREET,PRIVATE STREET,BATON ROUGE,BATON ROUGE
1,1,4TH,N 4TH ST,BATON ROUGE,BATON ROUGE
2,10,11TH,S 11TH ST,BATON ROUGE,BATON ROUGE
3,100,ADDINGTON,ADDINGTON AVE,BATON ROUGE,BATON ROUGE
4,1000,CHALFONT,W CHALFONT DR,BATON ROUGE,PARISH


In [4]:
# Set new index to STREET NAME
df = original_df.set_index("STREET NAME")
df.head()

Unnamed: 0_level_0,STREET NAME ID,STREET FULL NAME,POSTAL COMMUNITY,MUNICIPAL COMMUNITY
STREET NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PRIVATE STREET,1400342,PRIVATE STREET,BATON ROUGE,BATON ROUGE
4TH,1,N 4TH ST,BATON ROUGE,BATON ROUGE
11TH,10,S 11TH ST,BATON ROUGE,BATON ROUGE
ADDINGTON,100,ADDINGTON AVE,BATON ROUGE,BATON ROUGE
CHALFONT,1000,W CHALFONT DR,BATON ROUGE,PARISH


In [5]:
# Grab the data contained within the "ADDINGTON" row and the "STREET FULL NAME" column
addington_name = df.loc["ADDINGTON", "STREET FULL NAME"]
print("Using Loc: " + addington_name)

also_addington_name = df.iloc[3, 1]
print("Using Iloc: " + also_addington_name)

Using Loc: ADDINGTON AVE
Using Iloc: ADDINGTON AVE


In [6]:
# Grab the first five rows of data and the columns from "STREET NAME ID" to "POSTAL COMMUNITY"
# The problem with using "STREET NAME" as the index is that the values are not unique so duplicates are returned
# If there are duplicates and loc[] is being used, Pandas will return an error
private_to_chalfont = df.loc[["PRIVATE STREET", "4TH", "11TH", "ADDINGTON", 
                              "CHALFONT"], ["STREET NAME ID", "STREET FULL NAME", "POSTAL COMMUNITY"]]
print(private_to_chalfont)

print()

# Using iloc[] will not find duplicates since a numeric index is always unique
also_private_to_chalfont = df.iloc[0:5, 0:3]
print(also_private_to_chalfont)

                STREET NAME ID STREET FULL NAME POSTAL COMMUNITY
STREET NAME                                                     
PRIVATE STREET         1400342   PRIVATE STREET      BATON ROUGE
PRIVATE STREET         1400001   PRIVATE STREET      BATON ROUGE
PRIVATE STREET         1400015   PRIVATE STREET      BATON ROUGE
PRIVATE STREET         1400161   PRIVATE STREET      BATON ROUGE
PRIVATE STREET         1400343   PRIVATE STREET      BATON ROUGE
...                        ...              ...              ...
11TH                         9        N 11TH ST      BATON ROUGE
ADDINGTON                  100    ADDINGTON AVE      BATON ROUGE
CHALFONT                  1000    W CHALFONT DR      BATON ROUGE
CHALFONT                   998    N CHALFONT DR      BATON ROUGE
CHALFONT                   999    S CHALFONT DR      BATON ROUGE

[329 rows x 3 columns]

                STREET NAME ID STREET FULL NAME POSTAL COMMUNITY
STREET NAME                                                     


In [7]:
# The following will select all rows for columns `STREET FULL NAME` and `POSTAL COMMUNITY`
df.loc[:, ["STREET FULL NAME", "POSTAL COMMUNITY"]].head()

Unnamed: 0_level_0,STREET FULL NAME,POSTAL COMMUNITY
STREET NAME,Unnamed: 1_level_1,Unnamed: 2_level_1
PRIVATE STREET,PRIVATE STREET,BATON ROUGE
4TH,N 4TH ST,BATON ROUGE
11TH,S 11TH ST,BATON ROUGE
ADDINGTON,ADDINGTON AVE,BATON ROUGE
CHALFONT,W CHALFONT DR,BATON ROUGE


In [8]:
# the following logic test/conditional statement returns a series of boolean values
municipal_parish = df["MUNICIPAL COMMUNITY"] == "PARISH"
municipal_parish.head()

STREET NAME
PRIVATE STREET    False
4TH               False
11TH              False
ADDINGTON         False
CHALFONT           True
Name: MUNICIPAL COMMUNITY, dtype: bool

In [9]:
# Loc and Iloc also allow for conditional statments to filter rows of data
# using Loc on the logic test above only returns rows where the result is True
only_prairieville = df.loc[df["POSTAL COMMUNITY"] == "PRAIRIEVILLE", :]
print(only_prairieville)

print()

# Multiple conditions can be set to narrow down or widen the filter
only_prairieville_and_jackson = df.loc[(df["POSTAL COMMUNITY"] == "PRAIRIEVILLE") | (
    df["POSTAL COMMUNITY"] == "JACKSON"), :]
print(only_prairieville_and_jackson)

                 STREET NAME ID    STREET FULL NAME POSTAL COMMUNITY  \
STREET NAME                                                            
ALLIGATOR BAYOU           16497  ALLIGATOR BAYOU RD     PRAIRIEVILLE   
BLUFF                     16498            BLUFF RD     PRAIRIEVILLE   

                MUNICIPAL COMMUNITY  
STREET NAME                          
ALLIGATOR BAYOU              PARISH  
BLUFF                        PARISH  

                  STREET NAME ID      STREET FULL NAME POSTAL COMMUNITY  \
STREET NAME                                                               
TALMADGE                    4772           TALMADGE DR          JACKSON   
TREAKLE                     4911            TREAKLE DR          JACKSON   
DENNIS                      1452             DENNIS CT          JACKSON   
ALLIGATOR BAYOU            16497    ALLIGATOR BAYOU RD     PRAIRIEVILLE   
BLUFF                      16498              BLUFF RD     PRAIRIEVILLE   
RENEE                       4072 