# DataStructures
Copyright © 2023 by Ernst Henle

## DataStructures (built-in, primitive, scalar)

In [None]:
int # e.g. 23, 0 and -17

In [None]:
30 + 10 # an integer operation

In [None]:
type(40)

In [None]:
float # e.g. -0.001 and 1.23e15

In [None]:
1.8e2/4.5 # a floating point operation

In [None]:
type(40.0)

In [None]:
float('nan')

In [None]:
float('inf')

In [None]:
float('-inf')

In [None]:
bool # i.e. True and False

In [None]:
1 == 0 # Boolean operation

In [None]:
type(1 == 0)

In [None]:
1.0 + False

In [None]:
3 - True

In [None]:
str # e.g. "hello", "?", and "1"

In [None]:
"lies, damn lies," + " and statistics" # String operation

In [None]:
type('lies, damn lies, and statistics')

In [None]:
#Cast an int to a string
str(1) + str(1)

In [None]:
# Cast a string to an int
int('1') + int ('1')

## More on Data Types

In [None]:
# Create an integer
x = 7

# Determine the data type of x
type(x)

In [None]:
# Add 3 to x
x + 3

In [None]:
# Create a float
x = 7.0

# Determine the data type of x
type(x)

In [None]:
# Add 3 to x
x + 3.0

In [None]:
# Add a float, an integer, and a Boolean
7 + 3.0 + True

In [None]:
# Create a string
x = "a"

# Determine the data type of x
type(x)

In [None]:
# Add b to x
x + "b"

In [None]:
# Try to add 3 to a string
x + 3

In [None]:
# Create a string
x = "7"

# Determine the data type of x
type(x)

In [None]:
# If we try to add 3 to x, we will get an error
x + 3

In [None]:
# Add "3" to x
x + "3"

## DataStructures (built-in, multi-dimensional)
Documentation on lists and other data structures
https://docs.python.org/3/tutorial/datastructures.html

In [None]:
list

In [None]:
City = ['USA', 704] # List can have mixed types
type(City)

In [None]:
City.append('Seattle')
City

In [None]:
City[1]

In [None]:
# Equal-length lists in a list create a rectangular strucure
Name = ['Seattle', 'San Jose', 'San Jose', 'La Paz', 'La Paz']
Country = ['USA', 'USA', 'Costa Rica', 'Mexico', 'Bolivia']
Population = [704, 1030, 333, 265, 757]
Cities = [Name, Country, Population]
Cities

In [None]:
Cities[2][3]

In [None]:
dict

In [None]:
CityNamesByCountry = {'USA':['Seattle', 'San Jose'], 'Costa Rica':'San Jose', 'Mexico':'Oaxaca'}
type(CityNamesByCountry) # dict

In [None]:
CityNamesByCountry

In [None]:
CityNamesByCountry['USA']

In [None]:
CityNamesByCountry['Mexico'] = 'La Paz'
CityNamesByCountry['Bolivia'] = 'La Paz'
CityNamesByCountry

In [None]:
list(CityNamesByCountry.keys())

In [None]:
list(CityNamesByCountry.values())

In [None]:
list(CityNamesByCountry.items())

In [None]:
'CostaRica' in CityNamesByCountry

In [None]:
'Costa Rica' in CityNamesByCountry

In [None]:
'LaPaz' in CityNamesByCountry.values()

In [None]:
'La Paz' in CityNamesByCountry.values()

In [None]:
CountryByCityNames = {'Seattle':'USA', 'San Jose':['USA', 'Costa Rica'], 'La Paz':['Bolivia', 'Mexico']}
CountryByCityNames

In [None]:
# Verify that the same information is here
Cities

## DataStructures (imported, multi-dimensional)

In [None]:
import numpy as np # make numpy package usable
import pandas as pd # make pandas package usable

In [None]:
# Create an array of integers
x = np.array([5, -7, 1, 1, 99])

# Determine the data type of x
type(x)

In [None]:
# Find out the data type for the elements in the array
x.dtype.name

In [None]:
# If we can add 3 to this array.
x + 3

In [None]:
# Create an array of strings
x = np.array(["abc", "", " ", "?", "7"])

In [None]:
# Determine the data type of x
type(x)

In [None]:
# Find out the data type for the elements in the array
x.dtype.name

In [None]:
# If we try to add 3 to this array of strings we will get a TypeError.
x + 3

In [None]:
# Create array of ages
ageList = [9, 9, 10, 8, 12, 10, 7, 8, 10, 8, 0, 9, 7, 9, 10, 9, 6, 9, 9, 11]
ages = np.array(ageList)
ages.std()

In [None]:
# Cannot assign a string to a numeric array
ages[0] = 'a'

In [None]:
# Create a histogram
import matplotlib.pyplot
matplotlib.pyplot.hist(ages)

In [None]:
Students = pd.DataFrame()
Students

In [None]:
# Add columns to the data frame (table)
Students['ages'] = ages
Students['Grade'] = [4, 3, 4, 3, 6, 5, 2, 3, 5, 3, 1, 4, 2, 5, 6, 4, 1, 4, 2, 6]
Students

In [None]:
# Determine the correlation coefficient between the two columns
Students.corr()