<a href="https://colab.research.google.com/github/chw8207/fastai_pytorch/blob/main/%ED%98%91%EC%97%85%ED%95%84%ED%84%B0%EB%A7%81.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from fastai.collab import *
from fastai.tabular.all import *
import pandas as pd
import numpy as np

### GPU 설정

In [2]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0' :
    raise SystemError('GPU device not found')
print(f'Found GPU at: {device_name}')

Found GPU at: /device:GPU:0


In [3]:
from tensorflow.python.client import device_lib

print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 2581778352109154540
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 40129593344
locality {
  bus_id: 1
  links {
  }
}
incarnation: 4892161052945829681
physical_device_desc: "device: 0, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:00:04.0, compute capability: 8.0"
xla_global_id: 416903419
]


### 데이터 : 무비렌즈

In [4]:
path = untar_data(URLs.ML_100k)

In [5]:
# 해당 테이블을 열고 내용을 살펴보기
ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None,
                      names=['user','movie','rating','timestap'])
ratings.head()

Unnamed: 0,user,movie,rating,timestap
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [6]:
# SF, 액션, 고전 영화를 나타내는 요소(스타워즈 최신작)
rise_skywalker = np.array([0.98,0.9,-0.9])

In [7]:
# 현대적인 SF영화를 좋아하는 사용자
user1 = np.array([0.9,0.8,-0.6])

In [8]:
# 이 둘의 조합에서 유사성 계산
(user1*rise_skywalker).sum()

2.1420000000000003

In [9]:
# 영화 카사블랑카
casablanca = np.array([-0.99,-0.3,0.8])

In [10]:
# 사용자 조합해서 유사성 계산
(user1*casablanca).sum()

-1.611

### DataLoaers만들기

In [11]:
movies = pd.read_csv(path/'u.item', delimiter='|', encoding='latin-1',
                     usecols=(0,1), names=('movie','title'), header=None)
movies.head()

Unnamed: 0,movie,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [12]:
# ratings와 결합하기
ratings = ratings.merge(movies)
ratings.head()

Unnamed: 0,user,movie,rating,timestap,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


In [14]:
# CollabDataLoader()객체 구축
dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64)
dls.show_batch()

Unnamed: 0,user,title,rating
0,683,FairyTale: A True Story (1997),3
1,747,Maverick (1994),4
2,334,My Fair Lady (1964),3
3,64,"Bridge on the River Kwai, The (1957)",4
4,763,Contact (1997),3
5,648,Annie Hall (1977),2
6,712,True Lies (1994),5
7,63,"Fifth Element, The (1997)",5
8,405,Fantasia (1940),3
9,881,"Blues Brothers, The (1980)",3


In [16]:
# 영화와 사용자의 잠재 요소 테이블을 간단한 행렬로 표현하기
n_users = len(dls.classes['user'])
n_movies = len(dls.classes['title'])
n_factors = 5

In [17]:
user_factors = torch.randn(n_users, n_factors)
movie_factors = torch.randn(n_movies, n_factors)

In [18]:
# 색인 3을 표현하는 원핫 인코딩된 벡터에
# 사용자용 잠재 요소 행렬을 곱함.
one_hot_3 = one_hot(3, n_users).float()
user_factors.t()@one_hot_3

tensor([-0.4128,  0.6499,  0.8273, -0.9481,  0.9508])

In [19]:
# 행렬의 세 번째 색인에 해당되는 벡터
user_factors[3]

tensor([-0.4128,  0.6499,  0.8273, -0.9481,  0.9508])

### 밑바닥부터 만드는 협업 필터링

In [20]:
# 클래스 만들어보기
class Example :
  def __init__(self, a) :
    self.a = a
  def say(self, x) :
    return f'Hello {self.a}, {x}'

In [21]:
ex = Example('Sylvain')
ex.say('nice to meet you')

'Hello Sylvain, nice to meet you'

In [None]:
# 점곱 계산하는 모델
class DotProduct(Module) :
  def __init__(self, n_users, n_movies, n_factors) :
    self.user_factors = Embedding(n_users, n_factors)
    self.movie_factors = Embedding(n_movies, n_factors)

  def forward(self, x) :
    users = self.user_factors(x[:,0])
    movies = self.movie_factors(x[:,1])
    return (users*movies).sum(dim=1)

In [23]:
x,y = dls.one_batch()
x.shape

torch.Size([64, 2])