## Dictionaries and Dataframes

## Dictionaries

> {key.obj}

In [1]:
dct = {"run":"To move swiftly","walk":"To move slowly, liesurely by foot"}
dct

{'run': 'To move swiftly', 'walk': 'To move slowly, liesurely by foot'}

### Dictionary Methods

In [2]:
dct.keys()

dict_keys(['run', 'walk'])

In [3]:
dct.values()

dict_values(['To move swiftly', 'To move slowly, liesurely by foot'])

In [4]:
dct.items()
#pulls in tuples

dict_items([('run', 'To move swiftly'), ('walk', 'To move slowly, liesurely by foot')])

## Calling value linked to a particular key

In [5]:
# think of a dictionary as a group of tuples

#calling dictionary values

dct["run"]

'To move swiftly'

In [6]:
dct["walk"]

'To move slowly, liesurely by foot'

In [7]:
newdct = {4:"the inability to walk", 4.1:"the ability to sing"}
newdct

{4: 'the inability to walk', 4.1: 'the ability to sing'}

In [8]:
newdct[4]

'the inability to walk'

In [9]:
newdct[4.1]

'the ability to sing'

## Defining values on the fly

In [10]:
dct["fly"] = "to move through the air"
dct

{'run': 'To move swiftly',
 'walk': 'To move slowly, liesurely by foot',
 'fly': 'to move through the air'}

# create equation using a dictionary

In [11]:
num_dct = {}
a = 3
b = 2
c = 10
for i in range(-10, 11):
    num_dct[i] = a * i**2 + b * i + c
    
num_dct
    

{-10: 290,
 -9: 235,
 -8: 186,
 -7: 143,
 -6: 106,
 -5: 75,
 -4: 50,
 -3: 31,
 -2: 18,
 -1: 11,
 0: 10,
 1: 15,
 2: 26,
 3: 43,
 4: 66,
 5: 95,
 6: 130,
 7: 171,
 8: 218,
 9: 271,
 10: 330}

## nesting a dictionary - creating more than one layer

In [12]:
dct = {
    "run":
          {"verb":"to move swiftly by foot",
          "noun": "refers to a period of time while one was running"},
    "walk":
          {"verb": "to move slowly, leisurely by foot",
          "noun": "refers to a period of time while one was walking"}}

dct

{'run': {'verb': 'to move swiftly by foot',
  'noun': 'refers to a period of time while one was running'},
 'walk': {'verb': 'to move slowly, leisurely by foot',
  'noun': 'refers to a period of time while one was walking'}}

In [13]:
# this syntax allows for a change on the fly for a new definition. 
# this example is two layers deep, but you can go farther than two
dct["run"]["verb"] = "to swiftly move by foot"
    
dct

{'run': {'verb': 'to swiftly move by foot',
  'noun': 'refers to a period of time while one was running'},
 'walk': {'verb': 'to move slowly, leisurely by foot',
  'noun': 'refers to a period of time while one was walking'}}

In [14]:
dct["fly"] = {"verb":"to move through air", 
              "noun":"that annoying bug that attacks you"}

dct

{'run': {'verb': 'to swiftly move by foot',
  'noun': 'refers to a period of time while one was running'},
 'walk': {'verb': 'to move slowly, leisurely by foot',
  'noun': 'refers to a period of time while one was walking'},
 'fly': {'verb': 'to move through air',
  'noun': 'that annoying bug that attacks you'}}

# transform a dictionary in to a dataframe

In [15]:
import pandas as pd
pd.DataFrame(dct)

Unnamed: 0,run,walk,fly
verb,to swiftly move by foot,"to move slowly, leisurely by foot",to move through air
noun,refers to a period of time while one was running,refers to a period of time while one was walking,that annoying bug that attacks you


In [16]:
# .T allows transposed dataframe
pd.DataFrame(dct).T

Unnamed: 0,verb,noun
run,to swiftly move by foot,refers to a period of time while one was running
walk,"to move slowly, leisurely by foot",refers to a period of time while one was walking
fly,to move through air,that annoying bug that attacks you


In [29]:
min_val = 0
max_val = 20
product_table = {}

for i in range(min_val, max_val + 1):
    product_table[i] = {}
    for j in range(min_val,max_val + 1):
        product_table[i][j] = i * j
    
product_table

{0: {0: 0,
  1: 0,
  2: 0,
  3: 0,
  4: 0,
  5: 0,
  6: 0,
  7: 0,
  8: 0,
  9: 0,
  10: 0,
  11: 0,
  12: 0,
  13: 0,
  14: 0,
  15: 0,
  16: 0,
  17: 0,
  18: 0,
  19: 0,
  20: 0},
 1: {0: 0,
  1: 1,
  2: 2,
  3: 3,
  4: 4,
  5: 5,
  6: 6,
  7: 7,
  8: 8,
  9: 9,
  10: 10,
  11: 11,
  12: 12,
  13: 13,
  14: 14,
  15: 15,
  16: 16,
  17: 17,
  18: 18,
  19: 19,
  20: 20},
 2: {0: 0,
  1: 2,
  2: 4,
  3: 6,
  4: 8,
  5: 10,
  6: 12,
  7: 14,
  8: 16,
  9: 18,
  10: 20,
  11: 22,
  12: 24,
  13: 26,
  14: 28,
  15: 30,
  16: 32,
  17: 34,
  18: 36,
  19: 38,
  20: 40},
 3: {0: 0,
  1: 3,
  2: 6,
  3: 9,
  4: 12,
  5: 15,
  6: 18,
  7: 21,
  8: 24,
  9: 27,
  10: 30,
  11: 33,
  12: 36,
  13: 39,
  14: 42,
  15: 45,
  16: 48,
  17: 51,
  18: 54,
  19: 57,
  20: 60},
 4: {0: 0,
  1: 4,
  2: 8,
  3: 12,
  4: 16,
  5: 20,
  6: 24,
  7: 28,
  8: 32,
  9: 36,
  10: 40,
  11: 44,
  12: 48,
  13: 52,
  14: 56,
  15: 60,
  16: 64,
  17: 68,
  18: 72,
  19: 76,
  20: 80},
 5: {0: 0,
  1: 5,
  2:

In [30]:
print(product_table[4][20])

80


In [18]:
pd.DataFrame(product_table)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
2,0,2,4,6,8,10,12,14,16,18,...,22,24,26,28,30,32,34,36,38,40
3,0,3,6,9,12,15,18,21,24,27,...,33,36,39,42,45,48,51,54,57,60
4,0,4,8,12,16,20,24,28,32,36,...,44,48,52,56,60,64,68,72,76,80
5,0,5,10,15,20,25,30,35,40,45,...,55,60,65,70,75,80,85,90,95,100
6,0,6,12,18,24,30,36,42,48,54,...,66,72,78,84,90,96,102,108,114,120
7,0,7,14,21,28,35,42,49,56,63,...,77,84,91,98,105,112,119,126,133,140
8,0,8,16,24,32,40,48,56,64,72,...,88,96,104,112,120,128,136,144,152,160
9,0,9,18,27,36,45,54,63,72,81,...,99,108,117,126,135,144,153,162,171,180


In [19]:
#APIs are typically arranged by dictionary. This is the fundamental
#structure that the data is saved and gives a way to apply data usage correctly

In [20]:
##build a dictionary with faculty information
    #will build first few rows
    
faculty_dict = {
                "Bangsund, Dean":{"Position":"Research Scientist",
                                 "Email":"d.bangsund@ndsu.edu",
                                 "Phone":"701-231-7471"},
               "Biermacher, Jon":{"Position":"Extension Livestock Development",
                                 "Email":"jon.biermacher@ndsu.edu",
                                 "Phone":"701-231-7379"},
}
faculty_dict


{'Bangsund, Dean': {'Position': 'Research Scientist',
  'Email': 'd.bangsund@ndsu.edu',
  'Phone': '701-231-7471'},
 'Biermacher, Jon': {'Position': 'Extension Livestock Development',
  'Email': 'jon.biermacher@ndsu.edu',
  'Phone': '701-231-7379'}}

In [21]:
pd.DataFrame(faculty_dict).T

Unnamed: 0,Position,Email,Phone
"Bangsund, Dean",Research Scientist,d.bangsund@ndsu.edu,701-231-7471
"Biermacher, Jon",Extension Livestock Development,jon.biermacher@ndsu.edu,701-231-7379


In [22]:
#this table format will paste into excel already arranged this way. 

In [23]:
faculty_df = pd.read_csv("NDSUFaculty.csv")
faculty_df

Unnamed: 0,Name,Position,Email,Phone
0,"Bangsund, Dean",Research Scientist,d.bangsund@ndsu.edu,Phone
1,,,,701-231-7471
2,"Biermacher, Jon",Extension Livestock Development Specialist,jon.biermacher@ndsu.edu,Phone
3,,,,701-231-7379
4,"Bullock, David",Research Assoc Professor,david.w.bullock@ndsu.edu,Phone
...,...,...,...,...
65,,,,701-231-7443
66,"Wachenheim, Cheryl",Professor,Cheryl.Wachenheim@ndsu.edu,Phone
67,,,,701-231-7452
68,"Wilson, William",Distinguished Professor,William.Wilson@ndsu.edu,Phone


In [24]:
### Divide the dataframe between Phone Numbers and All Renaing columns

In [25]:
# copy the "Phone" column
phone_numbers = faculty_df[["Phone"]]
phone_numbers

Unnamed: 0,Phone
0,Phone
1,701-231-7471
2,Phone
3,701-231-7379
4,Phone
...,...
65,701-231-7443
66,Phone
67,701-231-7452
68,Phone


In [26]:
del faculty_df["Phone"]

faculty_df = faculty_df.dropna()

faculty_df

Unnamed: 0,Name,Position,Email
0,"Bangsund, Dean",Research Scientist,d.bangsund@ndsu.edu
2,"Biermacher, Jon",Extension Livestock Development Specialist,jon.biermacher@ndsu.edu
4,"Bullock, David",Research Assoc Professor,david.w.bullock@ndsu.edu
6,"Carney, Jennifer",Student Services Director,Jennifer.Carney@ndsu.edu
8,"Caton, James",Assistant Professor,james.caton@ndsu.edu
10,"Dean, James",Assistant Professor of Economics,james.dean.1@ndsu.edu
12,"Englund, David",Senior Lecturer,david.englund@ndsu.edu
14,"Haakenson, Paulann",Information Processing Specialist,paulann.haakenson@ndsu.edu
16,"Hanson, Erik",Assistant Professor,erik.drevlow.hanson@ndsu.edu
18,"Haugen, Ron",Farm Management Specialist,ronald.haugen@ndsu.edu


In [31]:
# We can see that a phone number appears in row 1 then 
# reappears every 3 rows after that
# use iloc to identify observations subject to this pattern

phone_numbers.iloc[1::2]

Unnamed: 0,Phone
1,701-231-7471
3,701-231-7379
5,701-231-8672
7,701-231-7442
9,701-231-7337
11,701-231-9797
13,701-231-6641
15,701-231-7393
17,701-231-5747
19,701-231-8103


In [33]:
# place phone_number values in a column names Phone in faculty_df
faculty_df["Phone"] = phone_numbers["Phone"][1::2].values
faculty_df = faculty_df.set_index("Name")
faculty_df.to_csv("FacultyDF.csv")
faculty_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  faculty_df["Phone"] = phone_numbers["Phone"][1::2].values


Unnamed: 0_level_0,Position,Email,Phone
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Bangsund, Dean",Research Scientist,d.bangsund@ndsu.edu,701-231-7471
"Biermacher, Jon",Extension Livestock Development Specialist,jon.biermacher@ndsu.edu,701-231-7379
"Bullock, David",Research Assoc Professor,david.w.bullock@ndsu.edu,701-231-8672
"Carney, Jennifer",Student Services Director,Jennifer.Carney@ndsu.edu,701-231-7442
"Caton, James",Assistant Professor,james.caton@ndsu.edu,701-231-7337
"Dean, James",Assistant Professor of Economics,james.dean.1@ndsu.edu,701-231-9797
"Englund, David",Senior Lecturer,david.englund@ndsu.edu,701-231-6641
"Haakenson, Paulann",Information Processing Specialist,paulann.haakenson@ndsu.edu,701-231-7393
"Hanson, Erik",Assistant Professor,erik.drevlow.hanson@ndsu.edu,701-231-5747
"Haugen, Ron",Farm Management Specialist,ronald.haugen@ndsu.edu,701-231-8103
