Skip to content

Commit

Permalink
updated to be compatible with anaconda 4.0 and python 3.5
Browse files Browse the repository at this point in the history
  • Loading branch information
Kent Danielsson committed May 17, 2016
1 parent 7b05530 commit d0424d5
Show file tree
Hide file tree
Showing 16 changed files with 633 additions and 10,835 deletions.
1,158 changes: 61 additions & 1,097 deletions machine_learning/ml02v04_the_scikit-learn_interface.ipynb

Large diffs are not rendered by default.

792 changes: 33 additions & 759 deletions machine_learning/ml03v04_visualizing_the_data.ipynb

Large diffs are not rendered by default.

2,212 changes: 58 additions & 2,154 deletions machine_learning/ml04v04_dealing_with_bias_and_variance.ipynb

Large diffs are not rendered by default.

580 changes: 18 additions & 562 deletions machine_learning/ml13v04_support_vector_machines.ipynb

Large diffs are not rendered by default.

3,349 changes: 107 additions & 3,242 deletions pandas/pd01v04_basic_data_operativity.ipynb

Large diffs are not rendered by default.

1,077 changes: 39 additions & 1,038 deletions pandas/pd04v04_statistical_tools.ipynb

Large diffs are not rendered by default.

1,304 changes: 33 additions & 1,271 deletions pandas/pd06v04_advanced_data_management.ipynb

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions pandas/python2-deprecated/utilities/__init__.py
@@ -0,0 +1,3 @@
from tom import TomTom # (TomTom, FileGenerator)
from my_finance_tools import (side_by_side, csv_preview)
from my_finance_plots import montecarloPlot
160 changes: 160 additions & 0 deletions pandas/python2-deprecated/utilities/generators.py
@@ -0,0 +1,160 @@
import pandas as pd
from pandas_datareader.data import DataReader
import numpy as np

def p01_d2csv(tomtom):
d = {'a' : pd.Series(['one','one','two','three','two']),
'b' : pd.Series(['x','y','y','x','y']),
'c' : pd.Series(np.random.randn(5))}
d2 = pd.DataFrame(d)
d2.to_csv(tomtom.get_tmp_name('p01_d2.csv'))

def p01_d3csv(tomtom):
comuni = pd.read_csv(tomtom.get_example_name('tabella_comuni_italiani.txt'),
sep=';', header=0)
# d3 = pd.DataFrame(np.random.randn(1000, 100), columns=comuni['Comune'].ix[0:199])
comuni.to_csv(tomtom.get_tmp_name('p01_d3.csv'), index=False)

def p01_d4csv(tomtom):
idx = [('Fra', 'one', 'x'),
('Fra', 'two', 'y'),
('Fra', 'two', 'z'),
('Ger', 'one', 'x'),
('Jap', 'one', 'x'),
('Jap', 'two', 'x'),
('USA', 'one', 'y'),
('USA', 'one', 'z')]
index = pd.MultiIndex.from_tuples(idx, names=['Country', 'Number', 'Dir'])
d4 = pd.DataFrame(np.random.randn(8,3), index=index)
d4.to_csv(tomtom.get_tmp_name('p01_d4.csv'))

def p01_prices(tomtom):
symbols = ['AAPL', 'JNJ', 'XOM', 'GOOG']
data = dict([(sym, DataReader(sym, 'yahoo')['Close']) for sym in symbols])
df = pd.DataFrame.from_dict(data)
df.ix[-7:-1].to_csv(tomtom.get_tmp_name('p01_prices.txt'))

def p01_volumes(tomtom):
symbols = ['AAPL', 'JNJ', 'XOM']
data = dict([(sym, DataReader(sym, 'yahoo')['Volume']) for sym in symbols])
df = pd.DataFrame.from_dict(data)
df.ix[-7:-3].to_csv(tomtom.get_tmp_name('p01_volumes.txt'))

def p03_DAX(tomtom):
DAX = DataReader('^GDAXI','yahoo',start = '01/01/2000')
DAX.to_csv(tomtom.get_tmp_name('p03_DAX.csv'))

def p03_AAPL(tomtom):
DAX = DataReader('AAPL','yahoo',start = '01/01/2000')
DAX.to_csv(tomtom.get_tmp_name('p03_AAPL.csv'))

def p06_d3csv(tomtom):
d2 = pd.DataFrame({'City' : ['New York', ' frisco', 'houston', ' taft', 'venice'],
'State' : [' NY ', 'CA', ' tx ', ' OK', ' IL'],
'Name' : ['Roy', 'Johnn', 'Jim', 'Paul', 'Ross'],
'Revenues' : ['1250', '840', '349', '1100', '900']})
d2.to_csv(tomtom.get_tmp_name('p06_d2.txt'))

def p06_d2csv(tomtom):
d3 = pd.DataFrame({'Quantity' : ['1-one', '1-one', '2-two', '3-three'] * 6,
'Axis' : ['X', 'Y', 'Z'] * 8,
'Type' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 4,
'N1' : np.random.randn(24),
'N2' : np.random.randn(24)})
d3.to_csv(tomtom.get_tmp_name('p06_d3.txt'))


def p07_d1csv(tomtom):
d1 = pd.DataFrame({'State' : ['NE','KY','CO','CO','KY','KY','CO','NE','CO'],
'City' : ['Page','Stone','Rye','Rye','Dema','Keavy','Rye',
'Cairo', 'Dumont'],
'Views' : [10, 9, 3, 7, 4, 2, 1, 8, 12],
'Likes' : [4, 3, 0, 2, 1, 1, 0, 3, 7]})
d1.to_csv(tomtom.get_tmp_name('p07_d1.txt'))

def p07_d2csv(tomtom):
import random; random.seed(0)
import string
N = 1000
def rands(n):
choices = string.ascii_uppercase
return ''.join([random.choice(choices) for _ in xrange(n)])

tickers = np.array([rands(5) for _ in xrange(N)])

# Create a DataFrame containing 3 columns representing
# hypothetical, but random portfolios for a subset of tickers:
d2 = pd.DataFrame({'Momentum' : np.random.randn(500) / 200 + 0.03,
'Value' : np.random.randn(500) / 200 + 0.08,
'ShortInterest' : np.random.randn(500) / 200 - 0.02},
index=tickers.take(np.random.permutation(N)[:500]))

# Next, let's create a random industry classification for the tickers.
ind_names = np.array(['FINANCIAL', 'TECH'])
sampler = np.random.randint(0, len(ind_names), N)
industries = pd.Series(ind_names.take(sampler), index=tickers, name='industry')
d2['Industry'] = industries

d2.to_csv(tomtom.get_tmp_name('p07_d2.csv'))

def p07_portfolioh5(tomtom):
import random; random.seed(0)
import string
N = 1000
def rands(n):
choices = string.ascii_uppercase
return ''.join([random.choice(choices) for _ in xrange(n)])

tickers = np.array([rands(5) for _ in xrange(N)])
fac1, fac2, fac3 = np.random.rand(3, 1000)
ticker_subset = tickers.take(np.random.permutation(N)[:1000])

# portfolio = weighted sum of factors plus noise
portfolio = pd.Series(0.7 * fac1 - 1.2 * fac2 + 0.3 * fac3 + np.random.rand(1000),
index=ticker_subset)
factors = pd.DataFrame({'f1': fac1, 'f2': fac2, 'f3': fac3},
index=ticker_subset)

h5file = pd.HDFStore(tomtom.get_tmp_name('p07_portfolio.h5'))
h5file['factors'] = factors
h5file['portfolio'] = portfolio
h5file.close()

def baby_names(tomtom):
import zipfile
path = tomtom.get_example_name('babynames.zip')
opath = tomtom.get_tmp_name("")
z = zipfile.ZipFile(path, "r")
z.extractall(path=opath)

generators = {
'baby_names/': baby_names,
'p07_portfolio.h5': p07_portfolioh5,
'p07_d2.csv': p07_d2csv,
'p07_d1.txt': p07_d1csv,
'p06_d3.txt': p06_d3csv,
'p06_d2.txt': p06_d2csv,
'p03_DAX.csv': p03_DAX,
'p03_AAPL.csv': p03_AAPL,
'p01_prices.txt': p01_prices,
'p01_d2.csv': p01_d2csv,
'p01_d3.csv': p01_d3csv,
'p01_d4.csv': p01_d4csv,
'p01_volumes.txt': p01_volumes,
}

def generate_all():
from tom import TomTom
import os

tomtom = TomTom()
for filename, gen in generators.iteritems():
path = tomtom.get_tmp_name(filename)
if not os.path.exists(path):
print "Generating {}...".format(filename)
gen(tomtom)
else:
print "Skipped {} (already existing)".format(filename)

if __name__ == '__main__':
generate_all()
44 changes: 44 additions & 0 deletions pandas/python2-deprecated/utilities/my_finance_tools.py
@@ -0,0 +1,44 @@
from datetime import datetime
import matplotlib.finance as fin
import pandas as pd
from pandas import Index, DataFrame
from pandas.core.datetools import BMonthEnd

# MY FINANCE TOOLS -------------------------------------------------------------
''' Created by: Addfor s.r.l.
This module provides few example finance and utility functions
'''

def csv_preview(filename, lines_to_print=5):
'''
TODO - Add a control to define how many columns to print:
start_column = 0
end_column = 79
'''
with open(filename) as fid:
for _ in range(lines_to_print):
line = fid.readline()
print line,

def side_by_side(*objs, **kwds):
space = kwds.get('space', 4)
reprs = [repr(obj).split('\n') for obj in objs]
print '-'*40
print pd.core.common.adjoin(space, *reprs)
print '-'*40

def getQuotes(symbol, start, end):
'''getQuotes documentation'''
quotes = fin.quotes_historical_yahoo(symbol, start, end)
dates, opn, close, high, low, volume = zip(*quotes)
data = {'open': opn, 'close': close, 'high' : high,
'low' : low, 'volume': volume}

dates = Index([datetime.fromordinal(int(d)) for d in dates])
return DataFrame(data, index=dates)


def toMonthly(frame, how):
'''toMonthly documentation'''
offset = BMonthEnd()
return frame.groupby(offset.rollforward).aggregate(how)
6 changes: 3 additions & 3 deletions pandas/utilities/__init__.py
@@ -1,3 +1,3 @@
from tom import TomTom # (TomTom, FileGenerator)
from my_finance_tools import (side_by_side, csv_preview)
from my_finance_plots import montecarloPlot
from .tom import TomTom # (TomTom, FileGenerator)
from .my_finance_tools import (side_by_side, csv_preview)
from .my_finance_plots import montecarloPlot
16 changes: 8 additions & 8 deletions pandas/utilities/generators.py
@@ -1,6 +1,7 @@
import pandas as pd
from pandas_datareader.data import DataReader
import numpy as np
from .tom import TomTom

def p01_d2csv(tomtom):
d = {'a' : pd.Series(['one','one','two','three','two']),
Expand Down Expand Up @@ -78,9 +79,9 @@ def p07_d2csv(tomtom):
N = 1000
def rands(n):
choices = string.ascii_uppercase
return ''.join([random.choice(choices) for _ in xrange(n)])
return ''.join([random.choice(choices) for _ in range(n)])

tickers = np.array([rands(5) for _ in xrange(N)])
tickers = np.array([rands(5) for _ in range(N)])

# Create a DataFrame containing 3 columns representing
# hypothetical, but random portfolios for a subset of tickers:
Expand All @@ -103,9 +104,9 @@ def p07_portfolioh5(tomtom):
N = 1000
def rands(n):
choices = string.ascii_uppercase
return ''.join([random.choice(choices) for _ in xrange(n)])
return ''.join([random.choice(choices) for _ in range(n)])

tickers = np.array([rands(5) for _ in xrange(N)])
tickers = np.array([rands(5) for _ in range(N)])
fac1, fac2, fac3 = np.random.rand(3, 1000)
ticker_subset = tickers.take(np.random.permutation(N)[:1000])

Expand Down Expand Up @@ -144,17 +145,16 @@ def baby_names(tomtom):
}

def generate_all():
from tom import TomTom
import os

tomtom = TomTom()
for filename, gen in generators.iteritems():
for filename, gen in generators.items():
path = tomtom.get_tmp_name(filename)
if not os.path.exists(path):
print "Generating {}...".format(filename)
print("Generating {}...".format(filename))
gen(tomtom)
else:
print "Skipped {} (already existing)".format(filename)
print("Skipped {} (already existing)".format(filename))

if __name__ == '__main__':
generate_all()
10 changes: 5 additions & 5 deletions pandas/utilities/my_finance_tools.py
Expand Up @@ -18,14 +18,14 @@ def csv_preview(filename, lines_to_print=5):
with open(filename) as fid:
for _ in range(lines_to_print):
line = fid.readline()
print line,
print(line,)

def side_by_side(*objs, **kwds):
space = kwds.get('space', 4)
reprs = [repr(obj).split('\n') for obj in objs]
print '-'*40
print pd.core.common.adjoin(space, *reprs)
print '-'*40
print('-'*40)
print(pd.core.common.adjoin(space, *reprs))
print('-'*40)

def getQuotes(symbol, start, end):
'''getQuotes documentation'''
Expand All @@ -41,4 +41,4 @@ def getQuotes(symbol, start, end):
def toMonthly(frame, how):
'''toMonthly documentation'''
offset = BMonthEnd()
return frame.groupby(offset.rollforward).aggregate(how)
return frame.groupby(offset.rollforward).aggregate(how)
2 changes: 1 addition & 1 deletion pandas/utilities/tom.py
Expand Up @@ -3,7 +3,7 @@
TEMP_DIR="temp"
EXAMPLE_DIR="example_data"

from generators import generators
#from generators import generators

class TomTom(object):
"""
Expand Down

0 comments on commit d0424d5

Please sign in to comment.