In [None]:
import pandas as pd

column_names = ["row","id","year","stint","team","g","ab"]
column_widths = [3,9,4,1,3,2,3]
df = pd.read_fwf("baseball.txt",
 header=None,
 names=column_names,
 widths=column_widths)

df

In [None]:
import pandas
import matplotlib
#%matplotlib inline

baby_name = ['Alice','Charles','Diane','Edward']
number_births = [96, 155, 66, 272]
dataset = list(zip(baby_name,number_births))
df = pandas.DataFrame(data = dataset, columns=['Name', 'Number'])
df['Number'].plot()

In [None]:
import pylab
import random

random.seed(113)

samples = 1000
dice = []
for i in range(samples):
    total = random.randint(1,6) + random.randint(1,6)
    dice.append(total)

pylab.hist(dice, bins=pylab.arange(1.5,12.6,1.0))
pylab.show()

In [None]:
#
# Generating a density map using Python
#
# prerequisite: Install basemap package with 'pip install basemap' and download st99 and donwload states.csv
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Polygon
import pandas as pd
import numpy as np
import matplotlib

# create the map
map = Basemap(llcrnrlon=-119,llcrnrlat=22,urcrnrlon=-64,urcrnrlat=49,
    projection='lcc',lat_1=33,lat_2=45,lon_0=-95)# load the shapefile, use the name 'states'
# download from https://github.com/matplotlib/basemap/tree/master/examples/st99_d00.dbf,shx,shp
map.readshapefile('files/st99_d00', name='states', drawbounds=True)

# collect the state names from the shapefile attributes so we can
# look up the shape obect for a state by it's name
state_names = []
for shape_dict in map.states_info:
 state_names.append(shape_dict['NAME'])

ax = plt.gca() # get current axes instance

# load density data drawn from 
# https://en.wikipedia.org/wiki/List_of_U.S._states_by_population_density
df = pd.read_csv('files/states.csv')

# determine the range of density values
max_density = -1.0
min_density = -1.0
for index, row in df.iterrows():
    d = row['density/mi2']
    density = float(d.replace(',' , ''))
    if (max_density==-1.0) or (max_density<density):
        max_density = density
    if (min_density==-1.0) or (min_density>density):
        min_density = density
print('max',max_density)
print('min',min_density)
range_density = max_density - min_density
print(range_density)

# we pick a color for the state density out of red spectrum
cmap = matplotlib.colormaps.get_cmap('Spectral')

# for each state get the color for it's density
for index, row in df.iterrows():
    state_name = row['State']
    d = row['density/mi2']
    density = float(d.replace(',' , ''))
    color = cmap((density - min_density)/range_density)
    seg = map.states[state_names.index(state_name)]
    poly = Polygon(seg, facecolor=color, edgecolor=color)
    ax.add_patch(poly)

plt.show()

In [None]:
#
# Plotting 3D data using Python
#

%matplotlib inline

# import tools we are using
import pandas as pd
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

# read in the car ‘table’ – not a csv, so we need
# to add in the column names
column_names = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'year', 'origin', 'name']
df = pd.read_table('http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data', sep=r"\s+", index_col=0, header=None, names = column_names)
print(df.head())

#start out plotting (uses a subplot as that can be 3d)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d') # pull out the 3 columns that we want
xs = []
ys = []
zs = []
for index, row in df.iterrows():
    xs.append(row['weight'])
    ys.append(index) #read_table uses first column as index
    zs.append(row['cylinders']) # based on our dataset the extents of the axes
plt.xlim(min(xs), max(xs))
plt.ylim(min(ys), max(ys))
ax.set_zlim(min(zs), max(zs))

# standard scatter diagram (except it is 3d)
ax.scatter(xs, ys, zs)

ax.set_xlabel('Weight')
ax.set_ylabel('MPG')
ax.set_zlabel('Cylinders')

plt.show()

In [None]:
import ipywidgets as widgets
from IPython.display import display

my_button = widgets.Button(description='Click My Button')
display(my_button)

def my_button_clicked(b):
    print("You clicked on My Button")

my_button.on_click(my_button_clicked)   

In [None]:
import ipywidgets as widgets

widgets.RadioButtons(
 options=['red', 'green', 'blue'],
 description='Balloon color:',
 disabled=False
)

In [None]:
from ipywidgets import *
from IPython.display import display

slider = widgets.FloatSlider() 
message = widgets.Text(value='Hello World')
message2 = widgets.Text(value='Hello World 2')

container = widgets.Box(children=[slider, message,message2])
container.layout.border = '1px gray solid'

display(container)

In [None]:
import pyspark

if not 'sc' in globals():
    sc = pyspark.SparkContext()

text_file = sc.textFile("hello_r.ipynb")
counts = text_file.flatMap(lambda line: line.split(" ")) \
    .map(lambda word: (word, 1)) \
    .reduceByKey(lambda a, b: a + b)

for x in counts.collect():
    print(x)

In [None]:
import pyspark

if not 'sc' in globals():
 sc = pyspark.SparkContext()
 
text_file = sc.textFile("hello_r.ipynb")
sorted_counts = text_file.flatMap(lambda line: line.split(" ")) \
 .map(lambda word: (word, 1)) \
 .reduceByKey(lambda a, b: a + b) \
 .sortByKey()

for x in sorted_counts.collect():
 print(x)