# Dictionaries and Dataframes
## Dictionaries

In [6]:
# Dictionaries are structures that link keys to values.
# key value pair : {key:val}
dct = {"run": "to move swiftly by foot",
      "walk": "to move slowly, leisurely by foot"}
dct

{'run': 'to move swiftly by foot', 'walk': 'to move slowly, leisurely by foot'}

### Dictionary Calling and Methods

In [7]:
dct["run"]

'to move swiftly by foot'

In [8]:
dct["walk"]

'to move slowly, leisurely by foot'

In [9]:
dct.keys()

dict_keys(['run', 'walk'])

In [10]:
dct.values()

dict_values(['to move swiftly by foot', 'to move slowly, leisurely by foot'])

In [11]:
dct.items()

dict_items([('run', 'to move swiftly by foot'), ('walk', 'to move slowly, leisurely by foot')])

### Defining values on the fly

In [12]:
dct["fly"] = "to move through the air"
dct

{'run': 'to move swiftly by foot',
 'walk': 'to move slowly, leisurely by foot',
 'fly': 'to move through the air'}

In [13]:
# create a function and find its points using a dictionary
num_dct = {}
a = 3
b = 2
c = 10
for i in range(-10, 11):
    num_dct[i] = a * (i ** 2) + b * i + c
num_dct

{-10: 290,
 -9: 235,
 -8: 186,
 -7: 143,
 -6: 106,
 -5: 75,
 -4: 50,
 -3: 31,
 -2: 18,
 -1: 11,
 0: 10,
 1: 15,
 2: 26,
 3: 43,
 4: 66,
 5: 95,
 6: 130,
 7: 171,
 8: 218,
 9: 271,
 10: 330}

### Multi-Layer Dictionary

In [14]:
mdct = {"run":{
            "verb":"",
            "noun":""},
        "walk":{
            "verb":"",
            "noun":""}}
mdct

{'run': {'verb': '', 'noun': ''}, 'walk': {'verb': '', 'noun': ''}}

In [15]:
mdct["run"]["verb"] = "to move swiftly by foot"
mdct["run"]["noun"] = "a period in time during which one was running"
mdct["walk"]["verb"] = "to move slowly by foot"
mdct["walk"]["noun"] = "a period in time during which one was walking"

mdct

{'run': {'verb': 'to move swiftly by foot',
  'noun': 'a period in time during which one was running'},
 'walk': {'verb': 'to move slowly by foot',
  'noun': 'a period in time during which one was walking'}}

In [16]:
mdct["fly"] = {"verb":"", "noun":""}
mdct["fly"]["verb"] = "to move through the air"
mdct["fly"]["noun"] = "a buzzing insect often seen around rotten meat"
mdct

{'run': {'verb': 'to move swiftly by foot',
  'noun': 'a period in time during which one was running'},
 'walk': {'verb': 'to move slowly by foot',
  'noun': 'a period in time during which one was walking'},
 'fly': {'verb': 'to move through the air',
  'noun': 'a buzzing insect often seen around rotten meat'}}

## Transforming a dictionary to a dataframe

In [17]:
import pandas as pd

In [18]:
pd.DataFrame(mdct)

Unnamed: 0,run,walk,fly
verb,to move swiftly by foot,to move slowly by foot,to move through the air
noun,a period in time during which one was running,a period in time during which one was walking,a buzzing insect often seen around rotten meat


In [19]:
pd.DataFrame(mdct).T

Unnamed: 0,verb,noun
run,to move swiftly by foot,a period in time during which one was running
walk,to move slowly by foot,a period in time during which one was walking
fly,to move through the air,a buzzing insect often seen around rotten meat


In [20]:
# Creating a product table
min_val = 0
max_val = 15

product_table = {}

for i in range(min_val, max_val + 1):
    product_table[i] = {}
    for j in range(min_val, max_val + 1):
        product_table[i][j] = i * j
pd.DataFrame(product_table)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
2,0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30
3,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45
4,0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60
5,0,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75
6,0,6,12,18,24,30,36,42,48,54,60,66,72,78,84,90
7,0,7,14,21,28,35,42,49,56,63,70,77,84,91,98,105
8,0,8,16,24,32,40,48,56,64,72,80,88,96,104,112,120
9,0,9,18,27,36,45,54,63,72,81,90,99,108,117,126,135


In [21]:
#build a dict with faculty info
    #build the first few rows
faculty_dict = {"Bangsund, Dean": {"Position":"Research Scientist", 
                                    "Email":"d.bangsun@ndsu.edu", 
                                   "Phone":"701-231-7471"},
                "Biermacher, Jon": {"Position":"Extension Livestock Development Specialist", 
                                    "Email":"jon.biermacher@ndsu.edu", 
                                    "Phone":"701-231-7379"}}
pd.DataFrame(faculty_dict).T

Unnamed: 0,Position,Email,Phone
"Bangsund, Dean",Research Scientist,d.bangsun@ndsu.edu,701-231-7471
"Biermacher, Jon",Extension Livestock Development Specialist,jon.biermacher@ndsu.edu,701-231-7379


In [22]:
faculty_df = pd.read_csv("Faculty.csv")
faculty_df

Unnamed: 0,Name,Position,Email,Phone
0,"Bangsund, Dean",Research Scientist,d.bangsund@ndsu.edu,Phone
1,,,,701-231-7471
2,,,,
3,"Biermacher, Jon",Extension Livestock Development Specialist,jon.biermacher@ndsu.edu,Phone
4,,,,701-231-7379
...,...,...,...,...
100,,,,701-231-7452
101,,,,
102,"Wilson, William",Distinguished Professor,William.Wilson@ndsu.edu,Phone
103,,,,701-231-7472


In [23]:
phone_numbers = faculty_df[["Phone"]]
del faculty_df["Phone"]
faculty_df = faculty_df.dropna()

In [24]:
faculty_df

Unnamed: 0,Name,Position,Email
0,"Bangsund, Dean",Research Scientist,d.bangsund@ndsu.edu
3,"Biermacher, Jon",Extension Livestock Development Specialist,jon.biermacher@ndsu.edu
6,"Bullock, David",Research Assoc Professor,david.w.bullock@ndsu.edu
9,"Carney, Jennifer",Student Services Director,Jennifer.Carney@ndsu.edu
12,"Caton, James",Assistant Professor,james.caton@ndsu.edu
15,"Dean, James",Assistant Professor of Economics,james.dean.1@ndsu.edu
18,"Englund, David",Senior Lecturer,david.englund@ndsu.edu
21,"Haakenson, Paulann",Information Processing Specialist,paulann.haakenson@ndsu.edu
24,"Hanson, Erik",Assistant Professor,erik.drevlow.hanson@ndsu.edu
27,"Haugen, Ron",Farm Management Specialist,ronald.haugen@ndsu.edu


In [25]:
#We can see a phone number appears in row 1 and reappears every three rows after
phone_numbers

Unnamed: 0,Phone
0,Phone
1,701-231-7471
2,
3,Phone
4,701-231-7379
...,...
100,701-231-7452
101,
102,Phone
103,701-231-7472


In [26]:
#use i loc to ident obs subject to this pattern
phone_numbers.iloc[1::3]

Unnamed: 0,Phone
1,701-231-7471
4,701-231-7379
7,701-231-8672
10,701-231-7442
13,701-231-7337
16,701-231-9797
19,701-231-6641
22,701-231-7393
25,701-231-5747
28,701-231-8103


In [27]:
faculty_df["Phone"] = phone_numbers["Phone"].iloc[1::3].values
faculty_df.to_csv("FacultyDF.csv")
faculty_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  faculty_df["Phone"] = phone_numbers["Phone"].iloc[1::3].values


Unnamed: 0,Name,Position,Email,Phone
0,"Bangsund, Dean",Research Scientist,d.bangsund@ndsu.edu,701-231-7471
3,"Biermacher, Jon",Extension Livestock Development Specialist,jon.biermacher@ndsu.edu,701-231-7379
6,"Bullock, David",Research Assoc Professor,david.w.bullock@ndsu.edu,701-231-8672
9,"Carney, Jennifer",Student Services Director,Jennifer.Carney@ndsu.edu,701-231-7442
12,"Caton, James",Assistant Professor,james.caton@ndsu.edu,701-231-7337
15,"Dean, James",Assistant Professor of Economics,james.dean.1@ndsu.edu,701-231-9797
18,"Englund, David",Senior Lecturer,david.englund@ndsu.edu,701-231-6641
21,"Haakenson, Paulann",Information Processing Specialist,paulann.haakenson@ndsu.edu,701-231-7393
24,"Hanson, Erik",Assistant Professor,erik.drevlow.hanson@ndsu.edu,701-231-5747
27,"Haugen, Ron",Farm Management Specialist,ronald.haugen@ndsu.edu,701-231-8103
