# Variable Types and Arrays #

In this notebook we will learn:
    
- About some of the different variable types
- How to convert between variable types (where possible)
- The basics of arrays
- Some array functions
- A little more about tables
- How to use the **arange** function to make an array

We will also see a few more graphs, some made from arrays and some made from tables.  


In [None]:
from datascience import *
import numpy as np

from datascience import *
import numpy as np
path_data = '../../../../../data/'
import matplotlib
matplotlib.use('Agg')
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)

from urllib.request import urlopen 
import re
def read_url(url): 
    return re.sub('\\s+', ' ', urlopen(url).read().decode())


## Review of Tables ##

In [None]:
skyscrapers = Table.read_table('skyscrapers.csv')
skyscrapers

In [None]:
skyscrapers.select('name', 'height')

In [None]:
skyscrapers.drop('completed', 'city', 'material')

In [None]:
skyscrapers

In [None]:
skyscrapers.where('city', 'Los Angeles')

In [None]:
skyscrapers.where('name', 'Empire State Building')

In [None]:
skyscrapers.where('city', 'New York City').sort('completed')

In [None]:
skyscrapers.where('city', 'New York City').sort('completed', descending=True)

In [None]:
chicago = skyscrapers.where('city', 'Chicago').drop('city')
chicago

In [None]:
chicago.where('material', 'steel').sort('completed', descending=True)

## Numbers ##

In Python, numbers can be integers or floats.  

If you start a calculation with all integers then from most operations, the result will be an integer.  The primary exception to this is division.  

If there is ONE float involved in a calculation, the result will be a float.  (Unless you use a special function to convert it back to an integer after the float has been used--we'll learn about those in a bit.)


In [None]:
4 + 5 

In [None]:
20  # int

In [None]:
20 / 9  # float

In [None]:
20 / 2  # also a float

In [None]:
123456789 ** 50

In [None]:
123456789.0 ** 20

In [None]:
123456789 ** 2

In [None]:
4 / 700

In [None]:
(4 / 700000000000000000)

In [None]:
0.12345678901234567890123456789

In [None]:
0.12345678901234567890123456789 - 0.1234567890123456789

In [None]:
10 ** 0.5

In [None]:
16 ** 0.5

In [None]:
(10 ** 0.5) ** 2

In [None]:
26 / 9

In [None]:
int(20 / 10)

In [None]:
int(26/9)

In [None]:
round(2.7777)

In [None]:
float(3)

In [None]:
6 / 4

In [None]:
6 / 4000

In [None]:
6 / 400000000000000000000000000000000000000000000000000000000

In [None]:
400000000000000000000000000000000000000000000000000000000 * 1.5e-56 

In [None]:
6 / 1.5e-56 

In [None]:
x = 5

In [None]:
2x

In [None]:
2 * x

In [None]:
x = 3
y = "4"
z = "5.6"

int(y)

In [None]:
int(z)

In [None]:
int(5.6)

In [None]:
int(float("5.6"))

In [None]:
round(5.6)

In [None]:
round(2.00000052345324, 3)

In [None]:
10 * 3.0

## Strings ##

Strings are essentially text, are set off by either single or double quotes.  A string **can** contain or consist of a number.  

In [None]:
'99 bottles of rootbeer'

In [None]:
"99"

In [None]:
"baby yoda"

In [None]:
"baby yoda isn't yoda"

In [None]:
'She said "Hello"'

In [None]:
'baby yoda isn't yoda'

In [None]:
'straw' + 'berry' # concatenation

In [None]:
3 + 'berry'

In [None]:
'straw' + ' '  + 'berry'

In [None]:
'ha' * 10

In [None]:
'lo' * 5.5

## Types ##

The `type` function will tell you what type of object Python considers something to be.  

In [None]:
Height = make_array("16m", "17m", "18m")
Height

In [None]:
type(Height.item(0))

In [None]:
type(Height)

In [None]:
a = 10
type(a)

In [None]:
type(4.5)

In [None]:
type('abc')

In [None]:
type(skyscrapers)

In [None]:
type(True)

In [None]:
type(abs)

In [None]:
type(np.round)

In [None]:
type(round(3.4))

In [None]:
type(np.round(3.4))

In [None]:
type('three')

In [None]:
type(3)

## Arrays ##

Who wants to guess what the command `make_array` does?

In [None]:
my_array = make_array(1, 2, 3, 4)

In [None]:
my_array

In [None]:
my_array * 2

In [None]:
my_array ** 2

In [None]:
my_array + 1

In [None]:
my_array # array is unchanged

In [None]:
another = make_array(60, 70, 80, 90)
another

In [None]:
my_array + another

In [None]:
yet_another = make_array(5, 6, 7)

In [None]:
my_array + yet_another

### Function on Arrays ###

In [None]:
len(my_array)

In [None]:
sum(my_array)

In [None]:
sum(my_array)/len(my_array)

In [None]:
np.average(my_array)

In [None]:
another = make_array(60, 70, 80, 90)

In [None]:
len(yet_another)

In [None]:
tunas = make_array('bluefin', 'albacore', 'jim')
tunas

In [None]:
sum(tunas)

In [None]:
make_array(1 , 22, 34, 56)

## Columns of Tables *are* Arrays ##

In [None]:
sf = skyscrapers.where('city', 'San Francisco')
sf

In [None]:
sf.select('height')

In [None]:
sf.column('height')

In [None]:
feet = sf.column('height')*3.28

sf_new = sf.with_column("height (ft)", feet)
sf_new

In [None]:
sf

In [None]:
sf.column(3)

In [None]:
np.average(sf.column('height'))

In [None]:
la = skyscrapers.where('city', 'Los Angeles')
la.show()

In [None]:
np.average(la.column('height')) - np.average(sf.column('height'))

In [None]:
np.average(la.column('height'))

In [None]:
ny = skyscrapers.where('city', "New York City")
ny

In [None]:
np.average(ny.column('height')) - np.average(la.column('height'))

In [None]:
notny = skyscrapers.where('city', are.not_equal_to("New York City"))
notny

In [None]:
onlyLAandNYC = skyscrapers.where('city', are.contained_in("Los Angeles New York City"))
onlyLAandNYC

In [None]:
skyscrapers.where('city', are.not_contained_in("Los Angeles New York City"))

In [None]:
len(ny.column(3))

In [None]:
ny.num_rows

In [None]:
skyscrapers.where("city", "Chicago").num_rows

## Seeing some other graphs ##

For now, let's not focus on how to make these graphs, although in the future we can all come back to these notebooks to look more closely at the code that generated them.  Instead, let's focus on reading and interpretting the graphs.  

In [None]:
plots.figure(figsize=(6, 6))
plots.scatter(ny.column(4), 
              ny.column(3), 
              color='red')
plots.scatter(la.column(4), 
              la.column(3), 
              color='darkblue')
plots.xlabel('Year Completed')
plots.ylabel('Height')
plots.title('Comparing LA to NYC');

In [None]:
ticks=make_array(2.5, 3.5)
labels=make_array("NYC", "LA")

plots.figure(figsize=(6, 6))
plots.boxplot(ny.column(3), widths=.5, positions=make_array(ticks.item(0)) )
plots.boxplot(la.column(3), widths=.5, positions=make_array(ticks.item(1)) )
plots.xticks(ticks, labels)
plots.title("Boxplots Comparing NYC and LA Skyscrapers");

In [None]:
ticks=make_array(2.5, 3.5)
labels=make_array("NYC", "LA")

plots.figure(figsize=(6, 6))
plots.boxplot(ny.column(3), widths=.5, positions=make_array(ticks.item(0)), showmeans=True)
plots.boxplot(la.column(3), widths=.5, positions=make_array(ticks.item(1)), showmeans=True)
plots.xticks(ticks, labels)
plots.title("Boxplots Comparing NYC and LA Skyscrapers");

In [None]:
skyscrapers.group("city").sort('count', descending=True).barh('city')

In [None]:
skyscrapers.group("city").where('count', are.above(5)).barh('city')

In [None]:
skyscrapers.group("city").where('count', are.above(5)).barh('city')
plots.title("Number of Skyscrapers")
plots.xlabel("Number")
plots.ylabel("City");

In [None]:
skyscrapers.group("city").where('count', are.above(7)).sort('count').barh('city')
plots.title("Number of Skyscrapers")
plots.xlabel("Number")
plots.ylabel("City");

In [None]:
skyscrapers.group("city").where('count', are.above(7)).sort('count', descending=True).barh('city')
plots.title("Number of Skyscrapers")
plots.xlabel("Number")
plots.ylabel("City");

In [None]:
skyscrapers.group("city").where('count', are.above(7)).sort('city').barh('city')
plots.title("Number of Skyscrapers")
plots.xlabel("Number")
plots.ylabel("City");

## Ranges ##

In [None]:
make_array(0, 1, 2, 3, 4, 5, 6)

In [None]:
np.arange(6)

In [None]:
np.arange(7)

In [None]:
np.arange(3, 9)

In [None]:
np.arange(15, 7, -1)

In [None]:
np.arange(1, 21, 2)

In [None]:
np.arange(0, 21, 2)

In [None]:
b = np.arange(0, 1.01, 0.01)
b

In [None]:
a = np.arange(7)

In [None]:
a

In [None]:
a.item(6)

In [None]:
a.item(0)

In [None]:
b.item(49)