# Dictionaries and Dataframes

## Dictionaries

Dictionary structure:

> {key:obj}

In [1]:
dct = {"run":"to move swiftly",
      "walk":"to move slowly, liesurely by foot"}
dct

{'run': 'to move swiftly', 'walk': 'to move slowly, liesurely by foot'}

### dictionary methods

In [2]:
dct.keys()

dict_keys(['run', 'walk'])

In [3]:
dct.values()

dict_values(['to move swiftly', 'to move slowly, liesurely by foot'])

In [4]:
dct.items()

dict_items([('run', 'to move swiftly'), ('walk', 'to move slowly, liesurely by foot')])

## calling value linked to a particular key

In [5]:
# calling dictionary values
dct["run"]

'to move swiftly'

In [6]:
dct["walk"]

'to move slowly, liesurely by foot'

### Defining values on the fly

In [7]:
dct["fly"] = "to move through the air"
dct

{'run': 'to move swiftly',
 'walk': 'to move slowly, liesurely by foot',
 'fly': 'to move through the air'}

In [8]:
dct["fly"]

'to move through the air'

## create equation using a dictionary

In [9]:
num_dct = {}
a = 3
b = 2
c =10
for i in range(-10, 11):
    num_dct[i] = a * i**2 + b * i + c

num_dct

{-10: 290,
 -9: 235,
 -8: 186,
 -7: 143,
 -6: 106,
 -5: 75,
 -4: 50,
 -3: 31,
 -2: 18,
 -1: 11,
 0: 10,
 1: 15,
 2: 26,
 3: 43,
 4: 66,
 5: 95,
 6: 130,
 7: 171,
 8: 218,
 9: 271,
 10: 330}

## Nesting a dictionary - creating more than one layer


In [10]:
dct = {
    "run":{
        "verb":"",
        "noun":""},
    "walk":{
        "verb":"",
        "noun":""}}
dct

{'run': {'verb': '', 'noun': ''}, 'walk': {'verb': '', 'noun': ''}}

In [11]:
dct["run"]["verb"] = "to move swiftly by foot"
dct["run"]["noun"] = "refers to a period of time while one was running"
dct["walk"]["verb"] = "to move slowly, liesurely by foot"
dct["walk"]["noun"] = "refers to a period of time while one was walking"
dct

{'run': {'verb': 'to move swiftly by foot',
  'noun': 'refers to a period of time while one was running'},
 'walk': {'verb': 'to move slowly, liesurely by foot',
  'noun': 'refers to a period of time while one was walking'}}

In [12]:
dct["fly"] = {"verb":"to move through air",
             "noun":"that buzzing bug that sometimes flies around your house"}
dct

{'run': {'verb': 'to move swiftly by foot',
  'noun': 'refers to a period of time while one was running'},
 'walk': {'verb': 'to move slowly, liesurely by foot',
  'noun': 'refers to a period of time while one was walking'},
 'fly': {'verb': 'to move through air',
  'noun': 'that buzzing bug that sometimes flies around your house'}}

## Transform a dictionary to a dataframe

In [13]:
import pandas as pd
pd.DataFrame(dct)

Unnamed: 0,run,walk,fly
verb,to move swiftly by foot,"to move slowly, liesurely by foot",to move through air
noun,refers to a period of time while one was running,refers to a period of time while one was walking,that buzzing bug that sometimes flies around y...


In [14]:
# df.T transposes the dataframe
pd.DataFrame(dct).T

Unnamed: 0,verb,noun
run,to move swiftly by foot,refers to a period of time while one was running
walk,"to move slowly, liesurely by foot",refers to a period of time while one was walking
fly,to move through air,that buzzing bug that sometimes flies around y...


## Creating a table of products by factor

In [15]:
min_val = 0
max_val = 100
product_table = {}
for i in range(min_val, max_val + 1):
    product_table[i] = {}
    for j in range(min_val, max_val +1):
        product_table[i][j] = i * j
    
product_table

{0: {0: 0,
  1: 0,
  2: 0,
  3: 0,
  4: 0,
  5: 0,
  6: 0,
  7: 0,
  8: 0,
  9: 0,
  10: 0,
  11: 0,
  12: 0,
  13: 0,
  14: 0,
  15: 0,
  16: 0,
  17: 0,
  18: 0,
  19: 0,
  20: 0,
  21: 0,
  22: 0,
  23: 0,
  24: 0,
  25: 0,
  26: 0,
  27: 0,
  28: 0,
  29: 0,
  30: 0,
  31: 0,
  32: 0,
  33: 0,
  34: 0,
  35: 0,
  36: 0,
  37: 0,
  38: 0,
  39: 0,
  40: 0,
  41: 0,
  42: 0,
  43: 0,
  44: 0,
  45: 0,
  46: 0,
  47: 0,
  48: 0,
  49: 0,
  50: 0,
  51: 0,
  52: 0,
  53: 0,
  54: 0,
  55: 0,
  56: 0,
  57: 0,
  58: 0,
  59: 0,
  60: 0,
  61: 0,
  62: 0,
  63: 0,
  64: 0,
  65: 0,
  66: 0,
  67: 0,
  68: 0,
  69: 0,
  70: 0,
  71: 0,
  72: 0,
  73: 0,
  74: 0,
  75: 0,
  76: 0,
  77: 0,
  78: 0,
  79: 0,
  80: 0,
  81: 0,
  82: 0,
  83: 0,
  84: 0,
  85: 0,
  86: 0,
  87: 0,
  88: 0,
  89: 0,
  90: 0,
  91: 0,
  92: 0,
  93: 0,
  94: 0,
  95: 0,
  96: 0,
  97: 0,
  98: 0,
  99: 0,
  100: 0},
 1: {0: 0,
  1: 1,
  2: 2,
  3: 3,
  4: 4,
  5: 5,
  6: 6,
  7: 7,
  8: 8,
  9: 9,
  10: 10,
  1

In [16]:
pd.DataFrame(product_table)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,100
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,100
2,0,2,4,6,8,10,12,14,16,18,...,182,184,186,188,190,192,194,196,198,200
3,0,3,6,9,12,15,18,21,24,27,...,273,276,279,282,285,288,291,294,297,300
4,0,4,8,12,16,20,24,28,32,36,...,364,368,372,376,380,384,388,392,396,400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,0,96,192,288,384,480,576,672,768,864,...,8736,8832,8928,9024,9120,9216,9312,9408,9504,9600
97,0,97,194,291,388,485,582,679,776,873,...,8827,8924,9021,9118,9215,9312,9409,9506,9603,9700
98,0,98,196,294,392,490,588,686,784,882,...,8918,9016,9114,9212,9310,9408,9506,9604,9702,9800
99,0,99,198,297,396,495,594,693,792,891,...,9009,9108,9207,9306,9405,9504,9603,9702,9801,9900


In [17]:
##Build a dictionary with faculty information
 # will build the first few rows
    
faculty_dct = {
                "Bangsund, Dean":{"Position":"Research Scientist",
                                "Email":"d.bangsund@ndsu.edu",
                                "Phone":"701-231-7471"},
                "Biermacher, Jon":{"Position":"Extension Livestock Development Specialist",
                                "Email":"jon.biermacher@ndsu.edu",
                                "Phone":"701-231-7379"},
}

faculty_dct

{'Bangsund, Dean': {'Position': 'Research Scientist',
  'Email': 'd.bangsund@ndsu.edu',
  'Phone': '701-231-7471'},
 'Biermacher, Jon': {'Position': 'Extension Livestock Development Specialist',
  'Email': 'jon.biermacher@ndsu.edu',
  'Phone': '701-231-7379'}}

In [18]:
pd.DataFrame(faculty_dct).T

Unnamed: 0,Position,Email,Phone
"Bangsund, Dean",Research Scientist,d.bangsund@ndsu.edu,701-231-7471
"Biermacher, Jon",Extension Livestock Development Specialist,jon.biermacher@ndsu.edu,701-231-7379


In [19]:
faculty_df = pd.read_csv("NDSUFaculty.csv")
faculty_df

Unnamed: 0,Name,Position,Email,Phone
0,"Bangsund, Dean",Research Scientist,d.bangsund@ndsu.edu,Phone
1,,,,701-231-7471
2,,,,
3,"Biermacher, Jon",Extension Livestock Development Specialist,jon.biermacher@ndsu.edu,Phone
4,,,,701-231-7379
...,...,...,...,...
100,,,,701-231-7452
101,,,,
102,"Wilson, William",Distinguished Professor,William.Wilson@ndsu.edu,Phone
103,,,,701-231-7472


In [20]:
### Divide the dataframe between Phone Numbers and All Remaining Columns

In [21]:
# copy the "Phone" column
phone_numbers = faculty_df[["Phone"]]
phone_numbers

Unnamed: 0,Phone
0,Phone
1,701-231-7471
2,
3,Phone
4,701-231-7379
...,...
100,701-231-7452
101,
102,Phone
103,701-231-7472


In [22]:
# delete the phone column from original dataframe
del faculty_df["Phone"]
# drop rows with null values using df.dropna()
faculty_df = faculty_df.dropna()
faculty_df

Unnamed: 0,Name,Position,Email
0,"Bangsund, Dean",Research Scientist,d.bangsund@ndsu.edu
3,"Biermacher, Jon",Extension Livestock Development Specialist,jon.biermacher@ndsu.edu
6,"Bullock, David",Research Assoc Professor,david.w.bullock@ndsu.edu
9,"Carney, Jennifer",Student Services Director,Jennifer.Carney@ndsu.edu
12,"Caton, James",Assistant Professor,james.caton@ndsu.edu
15,"Dean, James",Assistant Professor of Economics,james.dean.1@ndsu.edu
18,"Englund, David",Senior Lecturer,david.englund@ndsu.edu
21,"Haakenson, Paulann",Information Processing Specialist,paulann.haakenson@ndsu.edu
24,"Hanson, Erik",Assistant Professor,erik.drevlow.hanson@ndsu.edu
27,"Haugen, Ron",Farm Management Specialist,ronald.haugen@ndsu.edu


In [23]:
# We can see that a phone number appears in row 1, then
# reappears every 3 rows after that
# use iloc to identify observations subject to this pattern
#  leverage the structure that we used to call list values
phone_numbers.iloc[1::3]

Unnamed: 0,Phone
1,701-231-7471
4,701-231-7379
7,701-231-8672
10,701-231-7442
13,701-231-7337
16,701-231-9797
19,701-231-6641
22,701-231-7393
25,701-231-5747
28,701-231-8103


In [24]:
phone_numbers["Phone"][1::3].values

array(['701-231-7471', '701-231-7379', '701-231-8672', '701-231-7442',
       '701-231-7337', '701-231-9797', '701-231-6641', '701-231-7393',
       '701-231-5747', '701-231-8103', '701-231-6494', '701-231-7832',
       '701-231-5374', '701-730-0940', '701-231-6642', '701-231-1871',
       '701-231-8819', '701-231-8642', '701-231-8883', '701-231-9519',
       '701-231-7441', '701-231-8587', '701-231-7377', '701-231-8248',
       '701-231-7469', '701-231-5882', '701-231-7470', '701-231-5265',
       '701-231-9796', '701-231-6189', '701-231-7448', '701-231-7441',
       '701-231-7443', '701-231-7452', '701-231-7472'], dtype=object)

In [25]:
# place phone_number values in a column named Phone in faculty_df
faculty_df["Phone"] = phone_numbers["Phone"][1::3].values
faculty_df = faculty_df.set_index("Name")
faculty_df.to_csv("FacultyDF.csv")
faculty_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  faculty_df["Phone"] = phone_numbers["Phone"][1::3].values


Unnamed: 0_level_0,Position,Email,Phone
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Bangsund, Dean",Research Scientist,d.bangsund@ndsu.edu,701-231-7471
"Biermacher, Jon",Extension Livestock Development Specialist,jon.biermacher@ndsu.edu,701-231-7379
"Bullock, David",Research Assoc Professor,david.w.bullock@ndsu.edu,701-231-8672
"Carney, Jennifer",Student Services Director,Jennifer.Carney@ndsu.edu,701-231-7442
"Caton, James",Assistant Professor,james.caton@ndsu.edu,701-231-7337
"Dean, James",Assistant Professor of Economics,james.dean.1@ndsu.edu,701-231-9797
"Englund, David",Senior Lecturer,david.englund@ndsu.edu,701-231-6641
"Haakenson, Paulann",Information Processing Specialist,paulann.haakenson@ndsu.edu,701-231-7393
"Hanson, Erik",Assistant Professor,erik.drevlow.hanson@ndsu.edu,701-231-5747
"Haugen, Ron",Farm Management Specialist,ronald.haugen@ndsu.edu,701-231-8103
