In [1]:
import pandas as pd
import numpy as np

# Stock Data processing

## Part 1: Read Raw data

In [2]:
df = pd.read_csv('/Users/heyifan/Desktop/DBC/2019春季冲刺班/V1-Data Science VIP项目课件-完整版/V1-8节4个Data Project实操项目/项⽬4：推荐系统应用financial_product_recommendation/financial_product_recommendation/data/all_stocks_5yr.csv')
df.head()

Unnamed: 0,date,open,high,low,close,volume,Name
0,2013-02-08,15.07,15.12,14.63,14.75,8407500,AAL
1,2013-02-11,14.89,15.01,14.26,14.46,8882000,AAL
2,2013-02-12,14.45,14.51,14.1,14.27,8126000,AAL
3,2013-02-13,14.3,14.94,14.25,14.66,10259500,AAL
4,2013-02-14,14.94,14.96,13.16,13.99,31879900,AAL


In [3]:
print(f'Dataframe shape: {df.shape}')

Dataframe shape: (619040, 7)


## Part 2: Generate features from time series data
* Build function for one stock
* Group by whole dataset by their name
* Loop every stock

Take AAL as example

In [4]:
AAL = df[df.Name=='AAL']
print(f'Dataframe shape: {AAL.shape}')
AAL.head(10)

Dataframe shape: (1259, 7)


Unnamed: 0,date,open,high,low,close,volume,Name
0,2013-02-08,15.07,15.12,14.63,14.75,8407500,AAL
1,2013-02-11,14.89,15.01,14.26,14.46,8882000,AAL
2,2013-02-12,14.45,14.51,14.1,14.27,8126000,AAL
3,2013-02-13,14.3,14.94,14.25,14.66,10259500,AAL
4,2013-02-14,14.94,14.96,13.16,13.99,31879900,AAL
5,2013-02-15,13.93,14.61,13.93,14.5,15628000,AAL
6,2013-02-19,14.33,14.56,14.08,14.26,11354400,AAL
7,2013-02-20,14.17,14.26,13.15,13.33,14725200,AAL
8,2013-02-21,13.62,13.95,12.9,13.37,11922100,AAL
9,2013-02-22,13.57,13.6,13.21,13.57,6071400,AAL


### 2.1 Generate monthly return
* Too much data volatility in days

In [5]:
AAL['date'].values

array(['2013-02-08', '2013-02-11', '2013-02-12', ..., '2018-02-05',
       '2018-02-06', '2018-02-07'], dtype=object)

* Index is string type, so we can get monthly return by slice[0:7]
* We can generate an index only contains about month

In [6]:
AAL.index = list(map(lambda x:x[0:7], AAL.date)) # generate month column to group by and make this column become index

In [7]:
AAL.head()

Unnamed: 0,date,open,high,low,close,volume,Name
2013-02,2013-02-08,15.07,15.12,14.63,14.75,8407500,AAL
2013-02,2013-02-11,14.89,15.01,14.26,14.46,8882000,AAL
2013-02,2013-02-12,14.45,14.51,14.1,14.27,8126000,AAL
2013-02,2013-02-13,14.3,14.94,14.25,14.66,10259500,AAL
2013-02,2013-02-14,14.94,14.96,13.16,13.99,31879900,AAL


* Keep only the first data per month

In [8]:
AAL_month = AAL.groupby([AAL.index]).nth(0).reset_index() 
# nth means get first row of the grouped data
#reset_index means Turn the original index into column and regenerate an index at the same time
AAL_month.head(10)

Unnamed: 0,index,date,open,high,low,close,volume,Name
0,2013-02,2013-02-08,15.07,15.12,14.63,14.75,8407500,AAL
1,2013-03,2013-03-01,13.37,13.95,13.32,13.61,7376800,AAL
2,2013-04,2013-04-01,17.02,17.13,16.54,16.67,5222300,AAL
3,2013-05,2013-05-01,16.91,17.17,16.6,16.6,4943600,AAL
4,2013-06,2013-06-03,17.54,17.9,17.4,17.73,5776800,AAL
5,2013-07,2013-07-01,16.5,17.04,16.48,16.8,4666900,AAL
6,2013-08,2013-08-01,19.44,19.59,19.24,19.38,7989100,AAL
7,2013-09,2013-09-03,16.38,16.64,16.11,16.39,4178200,AAL
8,2013-10,2013-10-01,18.98,19.85,18.98,19.69,7630300,AAL
9,2013-11,2013-11-01,22.04,22.47,21.6,22.44,7190700,AAL


In [9]:
AAL_month['index'].head()

0    2013-02
1    2013-03
2    2013-04
3    2013-05
4    2013-06
Name: index, dtype: object

* Generate monthly return

#### WHAT IS RETURN | WHY IT IS IMPORTANT
- In the stock market, people only care about price, volume and rate of change
- Volatility of the RETURN RATE
    - We divide the next month's closing price by the previous month's closing price and subtract 1 
- Volatility of volume
    - Almost the same

How to get the next day's data: shift(1) 
* means get the data one row below

In [10]:
AAL_month.open.shift(1).head()

0      NaN
1    15.07
2    13.37
3    17.02
4    16.91
Name: open, dtype: float64

In [11]:
AAL_month.open.shift(3).head()

0      NaN
1      NaN
2      NaN
3    15.07
4    13.37
Name: open, dtype: float64

In [12]:
AAL_month['return'] = AAL_month.close.shift(1)/AAL_month.close -1 
AAL_month.head()

Unnamed: 0,index,date,open,high,low,close,volume,Name,return
0,2013-02,2013-02-08,15.07,15.12,14.63,14.75,8407500,AAL,
1,2013-03,2013-03-01,13.37,13.95,13.32,13.61,7376800,AAL,0.083762
2,2013-04,2013-04-01,17.02,17.13,16.54,16.67,5222300,AAL,-0.183563
3,2013-05,2013-05-01,16.91,17.17,16.6,16.6,4943600,AAL,0.004217
4,2013-06,2013-06-03,17.54,17.9,17.4,17.73,5776800,AAL,-0.063734


In [13]:
AAL_month['vol_change'] = AAL_month.volume.shift(1)/AAL_month.volume -1
AAL_month.head()

Unnamed: 0,index,date,open,high,low,close,volume,Name,return,vol_change
0,2013-02,2013-02-08,15.07,15.12,14.63,14.75,8407500,AAL,,
1,2013-03,2013-03-01,13.37,13.95,13.32,13.61,7376800,AAL,0.083762,0.139722
2,2013-04,2013-04-01,17.02,17.13,16.54,16.67,5222300,AAL,-0.183563,0.412558
3,2013-05,2013-05-01,16.91,17.17,16.6,16.6,4943600,AAL,0.004217,0.056376
4,2013-06,2013-06-03,17.54,17.9,17.4,17.73,5776800,AAL,-0.063734,-0.144232


### 2.2 Modulize engineering process

In [14]:
def generate_features(df):
    df_monthly = df.groupby([df.index]).nth(0).reset_index()
    df_monthly['return'] = df_monthly.close.shift(1)/df_monthly.close -1
    df_monthly['vol_change'] = df_monthly.volume.shift(1)/df_monthly.volume -1
    return df_monthly[1:]

In [15]:
test_AAL = generate_features(AAL)
test_AAL.head()

Unnamed: 0,index,date,open,high,low,close,volume,Name,return,vol_change
1,2013-03,2013-03-01,13.37,13.95,13.32,13.61,7376800,AAL,0.083762,0.139722
2,2013-04,2013-04-01,17.02,17.13,16.54,16.67,5222300,AAL,-0.183563,0.412558
3,2013-05,2013-05-01,16.91,17.17,16.6,16.6,4943600,AAL,0.004217,0.056376
4,2013-06,2013-06-03,17.54,17.9,17.4,17.73,5776800,AAL,-0.063734,-0.144232
5,2013-07,2013-07-01,16.5,17.04,16.48,16.8,4666900,AAL,0.055357,0.237824


### 2.3 Generate features for all stocks

In [16]:
df.index = list(map(lambda x:x[0:7], df.date)) #generate a month column to groupby 

In [17]:
df_featured = df.groupby(['Name']).apply(generate_features) # apply function to every stock 

In [18]:
df_featured = df_featured.reset_index(drop = True).set_index('Name')

In [19]:
df_featured.to_csv('jupyter notebook/df_featured.csv', index = True)

## Part 3: Package

In [20]:
class processor:
    def generate_monthly_features(self, df): # process whole dataset
        df.index = list(map(lambda x:x[0:7], df.date))
        df_featured = df.groupby(['Name']).apply(self, helper_monthly_features)
        return df_featured
    
    def helper_monthly_features(self, df): # only process one stock 
        df_monthly = df.groupby([df.index]).nth(0).reset_index()
        df_monthly['return'] = df_monthly.close.shift(1)/df_monthly.close -1
        df_monthly['vol_change'] = df_monthly.volume.shift(1)/df_monthly.volume -1
        return df_monthly[1:] 

# Custom data processing
* People's volumn data represents their attitude to that stock

## Part 1: Read stock data

In [21]:
df_featured.head()

Unnamed: 0_level_0,index,date,open,high,low,close,volume,return,vol_change
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
A,2013-03,2013-03-01,41.18,41.98,40.73,41.93,3089323,0.075125,-0.409335
A,2013-04,2013-04-01,41.83,41.9771,40.79,40.93,2541331,0.024432,0.215632
A,2013-05,2013-05-01,41.42,41.74,41.26,41.31,2726213,-0.009199,-0.067816
A,2013-06,2013-06-03,45.65,45.84,45.04,45.51,3677473,-0.092287,-0.258672
A,2013-07,2013-07-01,43.05,43.77,42.91,43.59,4283821,0.044047,-0.141544


## Part 2: Generate fake customer data

### 2.1 Generate customer position data

In [22]:
import random

#### 2.1.1 Generate fake data number of stocks that customer is holding

In [23]:
### random generate number of stocks that customer is holding
# 生成每个用户的持仓数
num_holding = random.sample(list(range(3,10)),1)
num_holding

[5]

random.sample function: https://www.cnblogs.com/yd1227/archive/2011/03/18/1988015.html

In [24]:
list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]  
slice = random.sample(list, 5)  #从list中随机获取5个元素，作为一个片断返回  
print (slice)

[6, 2, 9, 7, 4]


In [25]:
### make it into function
def sample(target, num):
    return random.sample(target, num) # 从target中随机取出num个数据 

In [33]:
### create fake data for 1000 customers 
# 生成1000个数据（为每个用户随机产生持仓数据）
num_customer = 1000 # 模拟1000个用户 
num_stocks = [] # 每个人持有的股票数目未知 

for i in range(num_customer):
#  num_stocks += sample(list(range(3, 10)), 1) # 给每个用户匹配一个自己的持仓数目 
    num_stocks += sample(range(3, 10), 1) # 给每个用户匹配一个自己的持仓数目 



In [34]:
num_stocks[:10]

[5, 9, 8, 5, 3, 8, 4, 8, 6, 4]

#### 2.1.2 Generate fake data: stocks that customers is holding

In [35]:
### Extract stock list 
# 取出所有股票的名称信息
stocksList = df_featured.index.unique().values

In [36]:
stocksList.shape

(505,)

In [47]:
stocksList

array(['A', 'AAL', 'AAP', 'AAPL', 'ABBV', 'ABC', 'ABT', 'ACN', 'ADBE',
       'ADI', 'ADM', 'ADP', 'ADS', 'ADSK', 'AEE', 'AEP', 'AES', 'AET',
       'AFL', 'AGN', 'AIG', 'AIV', 'AIZ', 'AJG', 'AKAM', 'ALB', 'ALGN',
       'ALK', 'ALL', 'ALLE', 'ALXN', 'AMAT', 'AMD', 'AME', 'AMG', 'AMGN',
       'AMP', 'AMT', 'AMZN', 'ANDV', 'ANSS', 'ANTM', 'AON', 'AOS', 'APA',
       'APC', 'APD', 'APH', 'APTV', 'ARE', 'ARNC', 'ATVI', 'AVB', 'AVGO',
       'AVY', 'AWK', 'AXP', 'AYI', 'AZO', 'BA', 'BAC', 'BAX', 'BBT',
       'BBY', 'BDX', 'BEN', 'BF.B', 'BHF', 'BHGE', 'BIIB', 'BK', 'BLK',
       'BLL', 'BMY', 'BRK.B', 'BSX', 'BWA', 'BXP', 'C', 'CA', 'CAG',
       'CAH', 'CAT', 'CB', 'CBG', 'CBOE', 'CBS', 'CCI', 'CCL', 'CDNS',
       'CELG', 'CERN', 'CF', 'CFG', 'CHD', 'CHK', 'CHRW', 'CHTR', 'CI',
       'CINF', 'CL', 'CLX', 'CMA', 'CMCSA', 'CME', 'CMG', 'CMI', 'CMS',
       'CNC', 'CNP', 'COF', 'COG', 'COL', 'COO', 'COP', 'COST', 'COTY',
       'CPB', 'CRM', 'CSCO', 'CSRA', 'CSX', 'CTAS', 'CTL', 'CTSH', 

In [51]:
# 将stocksList打散成为string格式 
stocksList = [ str(i) for i in stocksList ]
stocksList

['A',
 'AAL',
 'AAP',
 'AAPL',
 'ABBV',
 'ABC',
 'ABT',
 'ACN',
 'ADBE',
 'ADI',
 'ADM',
 'ADP',
 'ADS',
 'ADSK',
 'AEE',
 'AEP',
 'AES',
 'AET',
 'AFL',
 'AGN',
 'AIG',
 'AIV',
 'AIZ',
 'AJG',
 'AKAM',
 'ALB',
 'ALGN',
 'ALK',
 'ALL',
 'ALLE',
 'ALXN',
 'AMAT',
 'AMD',
 'AME',
 'AMG',
 'AMGN',
 'AMP',
 'AMT',
 'AMZN',
 'ANDV',
 'ANSS',
 'ANTM',
 'AON',
 'AOS',
 'APA',
 'APC',
 'APD',
 'APH',
 'APTV',
 'ARE',
 'ARNC',
 'ATVI',
 'AVB',
 'AVGO',
 'AVY',
 'AWK',
 'AXP',
 'AYI',
 'AZO',
 'BA',
 'BAC',
 'BAX',
 'BBT',
 'BBY',
 'BDX',
 'BEN',
 'BF.B',
 'BHF',
 'BHGE',
 'BIIB',
 'BK',
 'BLK',
 'BLL',
 'BMY',
 'BRK.B',
 'BSX',
 'BWA',
 'BXP',
 'C',
 'CA',
 'CAG',
 'CAH',
 'CAT',
 'CB',
 'CBG',
 'CBOE',
 'CBS',
 'CCI',
 'CCL',
 'CDNS',
 'CELG',
 'CERN',
 'CF',
 'CFG',
 'CHD',
 'CHK',
 'CHRW',
 'CHTR',
 'CI',
 'CINF',
 'CL',
 'CLX',
 'CMA',
 'CMCSA',
 'CME',
 'CMG',
 'CMI',
 'CMS',
 'CNC',
 'CNP',
 'COF',
 'COG',
 'COL',
 'COO',
 'COP',
 'COST',
 'COTY',
 'CPB',
 'CRM',
 'CSCO',
 'CSRA',
 'CSX',

In [52]:
### sample stock
stocks = []
# 两个input，一个是customer的信息，一个是他们持有股票的信息；根据他们持有股票的数量sample出股票名字 
for customer, num_stock in zip(range(num_customer), num_stocks): # 循环每组数据（把每个用户和他的持仓数匹配在一起）：用户1，8；用户2，4；
    sampled_stocks = sample(stocksList, num_stock) # 从股票列表中，抽取出num_stock支股票 给到每组循环（相当于每个人的持仓的股票）
    stocks.append(sampled_stocks)

zip function: https://blog.csdn.net/eric_sunah/article/details/20551087

In [53]:
stocks

[['TSCO', 'MAR', 'DVN', 'CBS', 'CMA'],
 ['HBAN', 'GLW', 'ADS', 'XRAY', 'ADM', 'LB', 'HSY', 'RE', 'JNPR'],
 ['MAA', 'CSX', 'AAP', 'ANSS', 'HAS', 'DISH', 'CB', 'NAVI'],
 ['MCO', 'DRE', 'BAX', 'ROK', 'CHD'],
 ['NTRS', 'SLG', 'RCL'],
 ['HSIC', 'CPB', 'VTR', 'AIG', 'ETFC', 'MCO', 'DISCA', 'GRMN'],
 ['HP', 'HCP', 'NTAP', 'ORLY'],
 ['EQT', 'AIV', 'ITW', 'FTI', 'ECL', 'SJM', 'DXC', 'UAL'],
 ['AEE', 'GM', 'CME', 'ECL', 'CCL', 'IDXX'],
 ['HII', 'UNM', 'ULTA', 'LNC'],
 ['MET', 'INCY', 'DISCK', 'CTXS', 'APTV', 'ABT'],
 ['NCLH', 'DHI', 'UDR', 'NKE', 'TWX', 'EQT'],
 ['BA', 'PAYX', 'NAVI', 'HRS', 'ADP', 'NSC', 'KO', 'CNC'],
 ['GLW', 'KSS', 'AAP', 'REG', 'MA', 'AEE', 'CRM', 'SEE', 'AMD'],
 ['NDAQ', 'PDCO', 'QRVO', 'SYK', 'DWDP', 'NCLH'],
 ['XOM', 'CVS', 'IDXX', 'NDAQ', 'KHC', 'CNC', 'HAS', 'DWDP', 'MO'],
 ['SHW', 'FMC', 'APA', 'AWK', 'NWSA', 'DRI', 'GD'],
 ['CB', 'LUK', 'VZ', 'AEP', 'PYPL', 'SNPS'],
 ['NWL', 'CMS', 'EW', 'DFS', 'CFG', 'V', 'TSCO'],
 ['FBHS', 'RTN', 'WHR', 'PAYX', 'SNI', 'MAA', 'ANSS']

### 2.1.3 Generate fake stock position data
* 生成每个用户对不同股票的持仓比例

In [54]:
### sample position for one user, eg user holding 3 stocks
eg_positions = []
for i in range(3):
    eg_positions.append(random.uniform(0,1)) # 一个是上限一个是下限，随机生成三个数存到eg_position里 
[x/sum(eg_positions) for x in eg_positions] # loop eg_position里面的每一个数，然后分别除以总的加和算出百分比（得到用户对每支股票的评价）

[0.20843836724244552, 0.7104105564973157, 0.08115107626023892]

In [55]:
### build function to sample position, input is number of stocks holded, output is corresponding position 
# input：持仓的数量；output：持仓的百分比 
def sample_position(count):
    positions = []
    for i in range(count):
        positions.append(random.uniform(0,1))
    sum_val = sum(positions)
    return [x/sum_val for x in positions]

In [56]:
sample_position(4)

[0.2678249713208429,
 0.3351857318363914,
 0.156869515274312,
 0.24011978156845373]

In [57]:
num_stocks

[5,
 9,
 8,
 5,
 3,
 8,
 4,
 8,
 6,
 4,
 6,
 6,
 8,
 9,
 6,
 9,
 7,
 6,
 7,
 7,
 9,
 7,
 5,
 9,
 7,
 5,
 9,
 8,
 9,
 7,
 5,
 3,
 8,
 6,
 6,
 7,
 3,
 7,
 7,
 5,
 7,
 3,
 4,
 5,
 4,
 8,
 4,
 6,
 9,
 7,
 9,
 8,
 3,
 9,
 8,
 8,
 6,
 6,
 8,
 4,
 9,
 7,
 3,
 5,
 8,
 6,
 3,
 3,
 6,
 3,
 9,
 5,
 9,
 8,
 5,
 8,
 4,
 3,
 5,
 4,
 5,
 4,
 9,
 8,
 7,
 8,
 5,
 7,
 3,
 5,
 4,
 7,
 6,
 9,
 5,
 6,
 5,
 9,
 9,
 4,
 8,
 5,
 6,
 8,
 9,
 5,
 8,
 8,
 9,
 7,
 5,
 3,
 5,
 3,
 3,
 9,
 9,
 5,
 5,
 8,
 5,
 5,
 6,
 9,
 6,
 5,
 5,
 9,
 8,
 8,
 5,
 4,
 3,
 4,
 4,
 9,
 8,
 6,
 6,
 8,
 7,
 8,
 5,
 3,
 6,
 5,
 3,
 3,
 3,
 4,
 4,
 4,
 7,
 3,
 4,
 6,
 5,
 9,
 7,
 3,
 6,
 7,
 9,
 5,
 7,
 7,
 8,
 7,
 5,
 4,
 6,
 3,
 4,
 6,
 3,
 4,
 5,
 4,
 6,
 7,
 8,
 5,
 5,
 5,
 3,
 9,
 9,
 3,
 6,
 3,
 9,
 9,
 8,
 4,
 9,
 5,
 3,
 3,
 9,
 3,
 6,
 4,
 5,
 8,
 4,
 6,
 3,
 8,
 7,
 6,
 4,
 9,
 8,
 4,
 6,
 8,
 7,
 5,
 8,
 5,
 3,
 4,
 6,
 3,
 3,
 6,
 9,
 8,
 5,
 8,
 4,
 8,
 3,
 9,
 5,
 4,
 8,
 3,
 7,
 5,
 4,
 8,
 9,
 3,
 5,
 7,
 8,
 4,
 6,
 4,


In [58]:
### loop over every customer 
positions = []
for i in num_stocks: # num_stocks: 用户的持仓数 
    sampled_positions = sample_position(i) # 生成每个用户对自己持仓股票的评价 
    positions.append(sampled_positions) # 变成一个list of list

In [59]:
positions

[[0.21541517825655657,
  0.07790604918253159,
  0.1357758261527755,
  0.29826666765525595,
  0.2726362787528805],
 [0.06665281179398831,
  0.21513069337492327,
  0.013101187382550733,
  0.2737607953278701,
  0.16367392486787619,
  0.041384784049497185,
  0.16858644308054435,
  0.05694237728331288,
  0.0007669828394371504],
 [0.16957031997714564,
  0.06099691967666583,
  0.046877843773816125,
  0.07868926149469496,
  0.12991369618510484,
  0.19872267594346354,
  0.19021012593016115,
  0.125019157018948],
 [0.03294147761810073,
  0.16964416825895764,
  0.5018476657129806,
  0.028761278791880604,
  0.2668054096180803],
 [0.2802320887114289, 0.5777290436310801, 0.14203886765749107],
 [0.1955076064711491,
  0.08034489431665888,
  0.1351849011592912,
  0.12061654567094024,
  0.1705032970051379,
  0.030567921781598516,
  0.07580131650078233,
  0.19147351709444183],
 [0.14732485439861098,
  0.049114564549866596,
  0.4513632028808999,
  0.35219737817062247],
 [0.19906908865527187,
  0.070491713

## Part 3: Save data

In [61]:
from collections import defaultdict
a = defaultdict(dict)
a['陈宇婷']['胸围'] = 4
a['陈宇婷']['腰围'] = 6
a

defaultdict(dict, {'陈宇婷': {'胸围': 4, '腰围': 6}})

In [62]:
from collections import defaultdict
    
customer_dic = defaultdict(dict)
for customer, stock, position in zip(range(num_customer),stocks,positions):
    customer_dic[customer]["stocks"] = stock
    customer_dic[customer]['positions'] = position

In [63]:
import pickle
def save_obj(obj, name):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [65]:
save_obj(customer_dic, 'jupyter notebook/customer_data')

## Part 4: Modulize

In [66]:
class data_generator:
    from collections import defaultdict
    import pickle
    import random
    
    # 天生包含的数据 
    def __init__(self,stocksList=None,numHoldingList=None,customeSize=1000):
        self.customeSize = customeSize
        self.stocksList = stocksList
        self.numHoldingList = numHoldingList ### numHoldingList format: [3,4,5,6,7,8,9,10]
    
    def generate_num_stocks(self): # generate出来每个用户持仓的数量 
        self.num_stocks = []
        for i in range(num_customer):
            self.num_stocks += sample(self.numHoldingList,1)
            
    def generate_stocks(self): # 判断客户持有哪几支股票 
        self.stocks = []
        for customer,num_stock in zip(range(self.customeSize),self.num_stocks):
            sampled_stocks = sample(list(self.stocksList),num_stock) ### Sample num_stock of stocks from stocks list 
            self.stocks.append(sampled_stocks)
            
    def generate_positions(self):
        self.positions = []
        for i in self.num_stocks:
            sampled_positions = self.sample_position(i)
            self.positions.append(sampled_positions)
            
    def sample_position(self,count):
        positions = []
        for i in range(count):
            positions.append(random.uniform(0,1))
        sum_val = sum(positions)
        return [x/sum_val for x in positions]
    
    def save_as_dict(self):

        self.customer_dic = defaultdict(dict)
        for customer,stock, position in zip(list(range(self.customeSize)),self.stocks,self.positions):
            self.customer_dic[customer]["stocks"] = stock
            self.customer_dic[customer]['positions'] = position
        return self.customer_dic
    
    def generate_data(self):
        self.generate_num_stocks()
        self.generate_stocks()
        self.generate_positions()
        customerDic = self.save_as_dict()
        return customer_dic

有问题：'list' object is not callable

In [67]:
# 客户1000个，股票为股票列表，customer持仓数为3-10支 
generator = data_generator(customeSize=1000,stocksList=stocksList,numHoldingList=[3,4,5,6,7,8,9,10]) 
customer_data = generator.generate_data()

TypeError: 'list' object is not callable

In [68]:
customer_data

NameError: name 'customer_data' is not defined