In [5]:
# Importing the libraries
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [6]:
# Importing the dataset
movies = pd.read_csv('ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')

In [7]:
# Preparing the training set and the test set
training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\t')
training_set = np.array(training_set, dtype = 'int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')

In [24]:
training_set

tensor([[0., 3., 4.,  ..., 0., 0., 0.],
        [4., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [5., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 5., 0.,  ..., 0., 0., 0.]])

In [8]:
# Getting the number of users and movies
nb_users = int(max(max(training_set[:,0]), max(test_set[:,0])))
nb_movies = int(max(max(training_set[:,1]), max(test_set[:,1])))

In [9]:
training_set

array([[        1,         2,         3, 876893171],
       [        1,         3,         4, 878542960],
       [        1,         4,         3, 876893119],
       ..., 
       [      943,      1188,         3, 888640250],
       [      943,      1228,         3, 888640275],
       [      943,      1330,         3, 888692465]])

In [10]:
user_list=np.zeros((1, nb_movies))

In [11]:
len(training_set)

79999

In [12]:
moveis_rated_by_this_user=training_set[training_set[:, 0]==1]

In [13]:
movies_rated_by_this_user=training_set[:, 1][training_set[:, 0]==1]

In [14]:
movies_rated_by_this_user

array([  2,   3,   4,   5,   7,   8,   9,  11,  13,  15,  16,  18,  19,
        21,  22,  25,  26,  28,  29,  30,  32,  34,  35,  37,  38,  40,
        41,  42,  43,  45,  46,  48,  50,  52,  55,  57,  58,  59,  63,
        66,  68,  71,  75,  77,  79,  83,  87,  88,  89,  93,  94,  95,
        99, 101, 105, 106, 109, 110, 111, 115, 116, 119, 122, 123, 124,
       126, 127, 131, 133, 135, 136, 137, 138, 139, 141, 142, 144, 146,
       147, 149, 152, 153, 156, 158, 162, 165, 166, 167, 168, 169, 172,
       173, 176, 178, 179, 181, 182, 187, 191, 192, 194, 195, 197, 198,
       199, 203, 204, 205, 207, 211, 216, 217, 220, 223, 231, 234, 237,
       238, 239, 240, 244, 245, 246, 247, 249, 251, 256, 257, 261, 263,
       268, 269, 270, 271])

In [15]:
def conversionToArray(dataset):
    total_list=[]
    for id_user in range(1, nb_users + 1):
        user_list=np.zeros(nb_movies)
        movies_rated_by_this_user=dataset[:, 1][dataset[:, 0]==id_user]
        ratings_by_this_user=dataset[:, 2][dataset[:, 0]==id_user]
        print(movies_rated_by_this_user)

        user_list[movies_rated_by_this_user-1]=ratings_by_this_user
        total_list.append(list(user_list))
    return total_list

In [16]:
new_list=conversionToArray(training_set)

[  2   3   4   5   7   8   9  11  13  15  16  18  19  21  22  25  26  28
  29  30  32  34  35  37  38  40  41  42  43  45  46  48  50  52  55  57
  58  59  63  66  68  71  75  77  79  83  87  88  89  93  94  95  99 101
 105 106 109 110 111 115 116 119 122 123 124 126 127 131 133 135 136 137
 138 139 141 142 144 146 147 149 152 153 156 158 162 165 166 167 168 169
 172 173 176 178 179 181 182 187 191 192 194 195 197 198 199 203 204 205
 207 211 216 217 220 223 231 234 237 238 239 240 244 245 246 247 249 251
 256 257 261 263 268 269 270 271]
[  1  10  14  25 100 111 127 237 242 255 258 269 272 273 274 275 276 277
 278 282 283 284 285 286 287 288 289 291 293 294 295 296 300 302 304 305
 306 309 310 311]
[181 258 260 268 271 288 302 303 317 319 320 321 322 325 326 329 333 336
 338 339 340 342 344 346 347 352 353 355]
[ 11 210 258 271 300 301 324 327 328 329 358 359 360 362]
[ 21  25  29  50  63  66  70  95  99 101 105 121 135 145 151 162 163 168
 169 172 174 181 183 186 189 194 200 204 208 

 1472]
[258 286 294 300 326 328 333 877]
[  15   39   65   97  121  147  154  168  172  173  179  181  185  271  286
  288  293  294  298  475  484  546  748  781  928 1074 1135]
[ 258  270  288  307  333  334  350  748  881  882  891 1243]
[125 243 245 288 300 301 321 326 596 687]
[   1    7   40   52   55   56   58   65   66   68   69   70   72   82   86
   90   92   95   99  100  125  131  132  153  169  172  181  204  210  216
  219  234  235  255  269  278  283  288  294  338  367  369  385  402  411
  417  418  420  421  423  427  433  443  451  473  476  485  486  496  546
  559  567  582  596  609  631  650  655  660  699  727  735  747  785  955
  959  974 1013 1014 1035 1095 1135]
[   1   23   28   31   50   58   87   98  100  132  133  135  136  141  174
  180  181  194  196  199  202  204  222  234  250  258  260  271  300  315
  316  322  323  357  378  434  435  443  465  480  482  484  496  498  511
  515  520  527  543  588  602  622  646  662  690  699  732  879 1020 1

[   1    2    3    4    7   11   12   21   22   24   27   28   29   33   38
   41   43   49   50   51   56   62   63   64   66   67   68   69   71   72
   73   78   79   80   82   88   90   91   94   95   96   97   99  100  101
  105  109  117  118  120  121  122  123  125  132  135  140  141  143  150
  151  161  168  172  173  174  175  176  177  181  183  185  186  191  193
  195  196  200  202  204  208  210  214  215  216  217  218  222  226  227
  228  229  230  231  232  233  234  235  239  240  250  252  254  255  257
  258  260  264  265  271  288  294  313  318  323  338  343  355  356  358
  362  365  366  367  368  370  373  374  375  378  380  384  385  386  391
  392  393  395  400  401  402  403  404  405  411  416  417  418  419  420
  421  423  426  431  432  443  449  455  465  472  473  475  477  485  496
  501  540  541  546  548  549  550  552  554  559  561  562  566  567  568
  569  576  577  578  581  584  588  597  603  609  625  633  651  655  658
  660  665  

[ 13  29  50  69  82  97 124 137 210 231 252 257 258 269 271 272 283 286
 288 289 294 300 302 307 311 323 328 333 340 345 347 354 355 358 367 403
 475 538 554 596 752 882 895 896 902 993]
[  1   7  12  22  23  50  56  64  79  82  96  97 111 114 117 118 121 125
 127 132 133 150 151 168 169 172 174 175 181 183 187 190 191 192 194 195
 196 205 208 216 222 235 246 248 252 257 258 268 269 271 276 290 300 302
 310 313 323 324 326 329 340 347 348 354 355 357 408 427 462 474 475 479
 480 482 483 490 505 508 511 514 515 517 521 522 523 527 531 537 603 614
 647 649 654 657 664 749 879 898 902 915]
[   8   15   83   96   98  135  144  161  168  174  175  186  191  195  199
  222  228  232  245  417  419  474  479  480  482  483  484  485  511  515
  519  521  603  606  611  615  650  651  657  659  705  945  949  969 1099
 1299]
[   2    4    5    7   11   12   17   22   23   27   29   31   33   38   50
   53   54   55   56   62   66   68   79   82   88   89   96   98  117  118
  121  123  144  1

[   1    4    7    8    9   10   11   12   13   14   16   19   20   22   24
   26   28   30   39   45   47   48   49   50   52   53   55   56   58   59
   60   61   64   65   66   68   69   70   72   79   81   82   83   86   88
   89   90   96   98  100  113  116  124  127  129  134  135  137  144  147
  151  153  154  168  170  172  173  174  175  176  183  185  190  194  195
  197  198  199  202  204  207  208  209  210  211  212  213  216  222  224
  226  227  228  229  234  237  238  239  241  250  251  256  257  265  269
  270  271  273  275  276  277  279  283  285  286  289  302  304  306  311
  312  313  315  316  318  319  322  323  331  338  342  345  346  347  349
  354  355  367  372  382  384  385  386  387  396  399  403  405  407  408
  414  421  430  435  455  461  462  463  464  477  479  487  490  496  504
  506  511  512  513  514  515  516  517  519  523  529  530  531  549  550
  553  559  561  566  568  580  582  584  589  603  634  647  648  656  659
  661  665  

In [17]:
len(new_list)

943

In [18]:
total_list[0]

NameError: name 'total_list' is not defined

In [19]:
# Converting the data into an array with users in lines and movies in columns
def convert(data):
    new_data = []
    for id_users in range(1, nb_users + 1):
        id_movies = data[:,1][data[:,0] == id_users]
        id_ratings = data[:,2][data[:,0] == id_users]
        ratings = np.zeros(nb_movies)
        print(id_movies.shape)
        ratings[id_movies - 1] = id_ratings
        new_data.append(list(ratings))
    return new_data
training_set_m = convert(training_set)
test_set_m = convert(test_set)

(134,)
(40,)
(28,)
(14,)
(91,)
(110,)
(213,)
(30,)
(12,)
(94,)
(105,)
(25,)
(373,)
(41,)
(60,)
(69,)
(19,)
(159,)
(10,)
(26,)
(95,)
(70,)
(88,)
(41,)
(41,)
(66,)
(10,)
(39,)
(17,)
(25,)
(23,)
(22,)
(14,)
(10,)
(17,)
(9,)
(37,)
(63,)
(13,)
(22,)
(31,)
(88,)
(112,)
(80,)
(29,)
(11,)
(16,)
(33,)
(107,)
(13,)
(16,)
(35,)
(16,)
(30,)
(10,)
(92,)
(54,)
(80,)
(215,)
(119,)
(12,)
(123,)
(48,)
(109,)
(48,)
(21,)
(12,)
(18,)
(38,)
(77,)
(22,)
(73,)
(34,)
(21,)
(44,)
(45,)
(39,)
(14,)
(27,)
(14,)
(30,)
(86,)
(78,)
(43,)
(159,)
(11,)
(113,)
(11,)
(40,)
(164,)
(50,)
(204,)
(9,)
(221,)
(157,)
(28,)
(35,)
(16,)
(63,)
(34,)
(38,)
(112,)
(15,)
(63,)
(16,)
(28,)
(13,)
(17,)
(134,)
(82,)
(12,)
(24,)
(31,)
(27,)
(49,)
(88,)
(36,)
(42,)
(98,)
(14,)
(46,)
(34,)
(29,)
(14,)
(90,)
(23,)
(11,)
(102,)
(15,)
(178,)
(15,)
(17,)
(18,)
(12,)
(20,)
(20,)
(24,)
(27,)
(15,)
(10,)
(65,)
(20,)
(15,)
(108,)
(185,)
(19,)
(10,)
(33,)
(25,)
(10,)
(166,)
(70,)
(13,)
(27,)
(11,)
(18,)
(27,)
(99,)
(69,)
(66,)
(34,)
(20,)
(13,)

(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)
(0,)


In [None]:
training_set1[0]

In [21]:
training_set=torch.FloatTensor(training_set_m)
test_set=torch.FloatTensor(test_set_m)

In [27]:
class SAE(nn.Module):
    def __init__(self, ):
        super(SAE, self).__init__()
        self.fc1=nn.Linear(nb_movies, 20)
        self.fc2=nn.Linear(20, 10)
        self.fc3=nn.Linear(10, 20)
        self.fc4=nn.Linear(20, nb_movies)
        self.activation=nn.Sigmoid()
    def forward(self, x):
        x=self.activation(self.fc1(x))
        x=self.activation(self.fc2(x))
        x=self.activation(self.fc3(x))
        x=self.fc4(x)