In [11]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
np.random.seed(1)

## User Class

In [12]:
class User:
    def __init__(self, user_id, age, sex, risk_taking, company_interest, industry_interest):
        """[User Class]

        Args:
            user_id ([int]): [user_id ex. 000124]
            age ([int]): [0: 20~29, 1:30~39, 2: 40~49, 3: 50~59]
            sex ([int]): [0: Male, 1: Female]
            risk_taking ([int]): [0: Low, 1: Mid, 2: High]
            company_interest ([int])
                : [0: Apple(AAPL), 1: Google(GOOG), 2: Amazon(AMZN), 3: Facebook(FB), 4: Microsoft(MSFT), 5: Visa(V),
                6: Tesla(TSLA), 7: JP Morgan Chase(JPM), 8: P&G(PG), 9: Nvidia(NVDA), 10: Pfizer(PFE)]
            industry_interest ([int])
                : [1: IT, 2: Healthcare, 3: Financials, 4: Energy, 5: Industrials]
        """
        self.user_id = user_id
        self.age = age
        self.sex = sex
        self.risk_taking = risk_taking
        self.company_interest = company_interest
        self.industry_interest = industry_interest
    
    def to_vector(self):
        self.user_vec = [self.user_id,
                         [self.age,
                         self.sex,
                         self.risk_taking,
                         self.company_interest,
                         self.industry_interest]]
        return self.user_vec

    def to_one_hot_vector(self):
        pass
        
    def __str__(self):
        return f"[User info] \n id: {self.user_id} \n age: {self.age} \n sex: {self.sex} \n risk_taking: {self.risk_taking} \n company_interest: {self.company_interest} \n industry_interest:{self.industry_interest} \n"

In [13]:
# User Example
user1 = User('%06d'%(np.random.random(1)[0]*100000), np.random.randint(4), 
            np.random.randint(2), np.random.randint(3), np.random.randint(10), np.random.randint(5))

# user1 = User('%06d'%(np.random.random(1)[0]*100000), 30, 'Male', 'High', 'Apple', 'ICT')
print(user1)
print(user1.to_vector())


[User info] 
 id: 054881 
 age: 1 
 sex: 0 
 risk_taking: 1 
 company_interest: 3 
 industry_interest:2 

['054881', [1, 0, 1, 3, 2]]


## Article Class

In [14]:
class Article:
    
    keyword_list = [['Apple',1], ['M&A',2], ['Startup',5], ['Samsung',9],['iPhone12',1.1],
                    ['Galaxy20',9.1], ['R&D',4.5], ['Coupang',20], ['Pfizer',100], ['COVID',103]]
    # 0: Apple, 1: M&A, 2: Startup, 3: Samsung, 4: iPhone12, ... 
    
    def __init__(self, article_id, keyword):
        self.article_id = article_id
        self.keyword = keyword
        
    def __str__(self):
        return f'[Article Keywords] \n id: {self.article_id} \n keywords: {self.keyword} \n'

In [15]:
# Article Example
article1 = Article("%06d"%(np.random.random(1)[0]*100000), 
                   [Article.keyword_list[np.random.randint(len(Article.keyword_list))], 
                    Article.keyword_list[np.random.randint(len(Article.keyword_list))], 
                    Article.keyword_list[np.random.randint(len(Article.keyword_list))]])
print(article1)

[Article Keywords] 
 id: 043758 
 keywords: [['R&D', 4.5], ['Pfizer', 100], ['Pfizer', 100]] 



## Generate User Datasets

In [16]:
USERS = []

for i in range(100):
    USERS.append(User('%06d'%(np.random.random(1)[0]*100000), np.random.randint(4), 
                 np.random.randint(2), np.random.randint(3), np.random.randint(10), np.random.randint(5)))

print(f'USERS shape: {np.shape(USERS)} \n')

for i, e in enumerate(USERS):
    # print(e)
    print(e.to_vector())


USERS shape: (100,) 

['027265', [1, 0, 2, 8, 1]]
['092559', [1, 0, 1, 4, 3]]
['036824', [1, 0, 0, 2, 3]]
['079915', [3, 1, 2, 7, 0]]
['058201', [1, 1, 0, 4, 3]]
['018633', [2, 1, 0, 2, 0]]
['001878', [1, 1, 2, 8, 4]]
['038648', [0, 1, 2, 8, 1]]
['069763', [1, 1, 2, 7, 3]]
['021038', [3, 0, 2, 3, 4]]
['057019', [0, 1, 0, 6, 4]]
['065279', [0, 1, 0, 4, 4]]
['025329', [3, 0, 2, 7, 0]]
['013818', [1, 1, 0, 5, 0]]
['062284', [0, 0, 0, 3, 2]]
['050962', [0, 1, 1, 9, 0]]
['044171', [3, 0, 2, 9, 2]]
['088047', [2, 1, 2, 3, 4]]
['026538', [3, 0, 1, 1, 4]]
['092929', [3, 0, 2, 3, 0]]
['071632', [0, 0, 2, 3, 0]]
['016494', [1, 0, 0, 2, 3]]
['070373', [3, 1, 0, 8, 0]]
['057615', [0, 0, 2, 8, 4]]
['089155', [1, 0, 0, 4, 3]]
['069947', [0, 0, 1, 9, 3]]
['088110', [0, 1, 1, 9, 0]]
['072525', [2, 1, 1, 3, 1]]
['039486', [0, 1, 1, 6, 0]]
['038346', [1, 0, 0, 6, 3]]
['009237', [1, 1, 1, 6, 4]]
['096896', [0, 1, 2, 3, 2]]
['009725', [3, 0, 2, 5, 3]]
['010022', [0, 0, 2, 6, 3]]
['084114', [0, 1, 0, 2, 3]

In [17]:
print(USERS[10])
print(USERS[10].to_vector())

[User info] 
 id: 057019 
 age: 0 
 sex: 1 
 risk_taking: 0 
 company_interest: 6 
 industry_interest:4 

['057019', [0, 1, 0, 6, 4]]


In [18]:
user1_to_vec = USERS[10].to_vector()[1]
print(user1_to_vec)
# enc = OneHotEncoder(handle_unknown='ignore')
# enc.fit(user1_to_vec)

[0, 1, 0, 6, 4]


## Generate Article Datasets

In [19]:
ARTICLES = []

for i in range(100):
    ARTICLES.append(Article("%06d"%(np.random.random(1)[0]*100000), 
                   [Article.keyword_list[np.random.randint(len(Article.keyword_list))], 
                    Article.keyword_list[np.random.randint(len(Article.keyword_list))], 
                    Article.keyword_list[np.random.randint(len(Article.keyword_list))]]))

print(f'ARTICLES shape: {np.shape(ARTICLES)} \n')

for i, e in enumerate(ARTICLES):
    print(e)
    # print(e.to_vector())


ARTICLES shape: (100,) 

[Article Keywords] 
 id: 083151 
 keywords: [['Coupang', 20], ['Galaxy20', 9.1], ['M&A', 2]] 

[Article Keywords] 
 id: 036810 
 keywords: [['R&D', 4.5], ['Pfizer', 100], ['Coupang', 20]] 

[Article Keywords] 
 id: 024606 
 keywords: [['Samsung', 9], ['Startup', 5], ['COVID', 103]] 

[Article Keywords] 
 id: 042783 
 keywords: [['Samsung', 9], ['Startup', 5], ['Galaxy20', 9.1]] 

[Article Keywords] 
 id: 036054 
 keywords: [['M&A', 2], ['Galaxy20', 9.1], ['Pfizer', 100]] 

[Article Keywords] 
 id: 073868 
 keywords: [['Pfizer', 100], ['iPhone12', 1.1], ['M&A', 2]] 

[Article Keywords] 
 id: 025723 
 keywords: [['M&A', 2], ['Startup', 5], ['M&A', 2]] 

[Article Keywords] 
 id: 090494 
 keywords: [['Coupang', 20], ['Galaxy20', 9.1], ['Apple', 1]] 

[Article Keywords] 
 id: 022826 
 keywords: [['M&A', 2], ['M&A', 2], ['R&D', 4.5]] 

[Article Keywords] 
 id: 068976 
 keywords: [['Apple', 1], ['Startup', 5], ['Samsung', 9]] 

[Article Keywords] 
 id: 011381 
 keywor