## Numpy

In [1]:
import numpy as np

### Create an array

In [2]:
# create an array from list
arr_from_list = np.array([1, 2, 3, 4, 5])
print('Array from list:', arr_from_list)

Array from list: [1 2 3 4 5]


In [3]:
# create an array with 0s
arr_zeros = np.zeros((3, 3))
print('Array of zeros:\n', arr_zeros)

Array of zeros:
 [[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


### Slicing and indexing

In [4]:
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print("Original array:\n", arr)

Original array:
 [[1 2 3]
 [4 5 6]
 [7 8 9]]


In [5]:
# 단일 원소에 접근
print("Element at index (0, 1):", arr[0, 1])

Element at index (0, 1): 2


In [6]:
# 원소들로 구성된 슬라이스에 접근
print("Slice of elements:\n", arr[1:, :2])

Slice of elements:
 [[4 5]
 [7 8]]


### np.where

In [7]:
arr

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [8]:
indices = np.where(arr > 4)
print("Indices of elements greater than 4:", indices)

Indices of elements greater than 4: (array([1, 1, 2, 2, 2]), array([1, 2, 0, 1, 2]))


### np.nonzero

이 함수는 sparse한 데이터를 처리할 때 유용하며, 0이 아닌 원소에만 집중할 수 있음

In [9]:
sparse_data = np.array([0, 0, 1, 0, 2, 0, 3])
nonzero_indices = np.nonzero(sparse_data)
print('Indices of non-zero elements:', nonzero_indices)

Indices of non-zero elements: (array([2, 4, 6]),)


### np.isnan

In [10]:
data_with_nan = np.array([1.0, np.nan, 3.0, np.nan, 5.0])
isnan_mask = np.isnan(data_with_nan)
print("Original array:", arr)

Original array: [[1 2 3]
 [4 5 6]
 [7 8 9]]


### Reshaping arrays

In [11]:
arr = np.array([1, 2, 3, 4, 5, 6])
print("Original array:", arr)

Original array: [1 2 3 4 5 6]


In [12]:
arr.shape

(6,)

In [13]:
reshaped_arr = arr.reshape((2, 3))
print("Reshaped array:\n", reshaped_arr)

Reshaped array:
 [[1 2 3]
 [4 5 6]]


In [14]:
reshaped_arr = arr.reshape((3, 2))
print("Reshaped array:\n", reshaped_arr)

Reshaped array:
 [[1 2]
 [3 4]
 [5 6]]


In [15]:
reshaped_arr = arr.reshape((1, -1))
print("Reshaped array:\n", reshaped_arr)
reshaped_arr.shape

Reshaped array:
 [[1 2 3 4 5 6]]


(1, 6)

### Array broadcasting

In [16]:
arr1 = np.array([1, 2, 3])
arr2 = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])

# summation of arrays

broadcasted_sum = arr1 + arr2
print("Broadcasted sum:\n", broadcasted_sum)

Broadcasted sum:
 [[2 3 4]
 [3 4 5]
 [4 5 6]]


In [17]:
arr1 = np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3]])
arr2 = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])

# summation of arrays

broadcasted_sum = arr1 + arr2
print("Broadcasted sum:\n", broadcasted_sum)

Broadcasted sum:
 [[2 3 4]
 [3 4 5]
 [4 5 6]]


### np.dot

In [19]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([[1, 2,], [3, 4], [5, 6]])

In [20]:
display(arr1)
display(arr2)

array([[1, 2, 3],
       [4, 5, 6]])

array([[1, 2],
       [3, 4],
       [5, 6]])

In [21]:
dot_product = np.dot(arr1, arr2)
print("Dot product:\n", dot_product)

Dot product:
 [[22 28]
 [49 64]]


### np.sort and np.argsort

In [22]:
ratings = np.array([3.2, 4.5, 2.8, 4.7, 3.9])

In [24]:
sorted_ratings = np.sort(ratings)
print("Sorted ratings:", sorted_ratings)

Sorted ratings: [2.8 3.2 3.9 4.5 4.7]


In [25]:
sorted_indices = np.argsort(ratings)
print("Indices of sorted ratings:", sorted_indices)

Indices of sorted ratings: [2 0 4 1 3]


In [26]:
sorted_indices[::-1]

array([3, 1, 4, 0, 2])

###  np.sum and np.mean

In [27]:
ratings = np.array([3.2, 4.5, 2.8, 4.7, 3.9])

In [28]:
# sum
total_ratings = np.sum(ratings)
print("Total ratings:", total_ratings)

Total ratings: 19.099999999999998


In [29]:
# average
mean_rating = np.mean(ratings)
print("Mean rating:", mean_rating)

Mean rating: 3.8199999999999994


### np.unique

In [30]:
user_ids = np.array([1, 2, 2, 3, 4, 4, 5, 5, 5])
unique_users = np.unique(user_ids)
print("Num of unique users:", unique_users)

Num of unique users: [1 2 3 4 5]


## Pandas

In [31]:
import pandas as pd

### Creating a DataFrame

In [32]:
# create a DataFrame form a dictionary
data = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}
df = pd.DataFrame(data)
display('DataFrame from a dictionary:\n', df)

'DataFrame from a dictionary:\n'

Unnamed: 0,A,B,C
0,1,4,7
1,2,5,8
2,3,6,9


### Rename columns

In [34]:
df = df.rename(columns={'A': 'Column_A', 'B': 'Column_B','C': 'Column_C'})
display("Renamed columns\n:", df)

'Renamed columns\n:'

Unnamed: 0,Column_A,Column_B,Column_C
0,1,4,7
1,2,5,8
2,3,6,9


### Selecting columns

In [None]:
selected_columns = df[['Column_A', 'Column_C']]
display("Selected columns:\n", selected_columns)

'Selected columns:\n'

Unnamed: 0,Column_A,Column_C
0,1,7
1,2,8
2,3,9


### Filtering rows

In [36]:
df['Column_A'] > 1

0    False
1     True
2     True
Name: Column_A, dtype: bool

In [37]:
filtered_rows = df[df['Column_A'] > 1]
display("Filtered rows:\n", filtered_rows)

'Filtered rows:\n'

Unnamed: 0,Column_A,Column_B,Column_C
1,2,5,8
2,3,6,9


In [38]:
# using the loc[] function with a boolean condition
filtered_rows = df.loc[df['Column_A'] > 1]
display("Filtered row:\n", filtered_rows)

'Filtered row:\n'

Unnamed: 0,Column_A,Column_B,Column_C
1,2,5,8
2,3,6,9


### Reindexing

In [39]:
new_index = [2, 0, 1]
reindexed_df = df.reindex(new_index)
display('Reindexed DataFrame:\n', reindexed_df)

'Reindexed DataFrame:\n'

Unnamed: 0,Column_A,Column_B,Column_C
2,3,6,9
0,1,4,7
1,2,5,8


### Handling missing values

In [None]:
# add a row with missing values
df.loc[3] = {'Column_A': None, 'Column_B': 10, 'Column_C': None}
display('DataFrame with missing value:\n', df)

'DataFrame with missing value:\n'

Unnamed: 0,Column_A,Column_B,Column_C
0,1.0,4,7.0
1,2.0,5,8.0
2,3.0,6,9.0
3,,10,


In [41]:
# delete the rows with missing values

df_no_missing = df.dropna()
display("DataFrame without missing values:\n", df_no_missing)

'DataFrame without missing values:\n'

Unnamed: 0,Column_A,Column_B,Column_C
0,1,4,7
1,2,5,8
2,3,6,9


In [42]:
# fill missing values
df_filled = df.fillna(0)
display("DataFrame with filled missing values:\n", df_filled)

  df_filled = df.fillna(0)


'DataFrame with filled missing values:\n'

Unnamed: 0,Column_A,Column_B,Column_C
0,1,4,7
1,2,5,8
2,3,6,9
3,0,10,0


### Grouped data

In [43]:
# add a 'category' column
df['Category'] = ['A', 'B', 'A', 'B']
display("DataFrame with category column:\n", df)

# groupby with 'Category' and take mean values
grouped = df.groupby('Category').mean()
display("Grouped data:\n", grouped)

'DataFrame with category column:\n'

Unnamed: 0,Column_A,Column_B,Column_C,Category
0,1.0,4,7.0,A
1,2.0,5,8.0,B
2,3.0,6,9.0,A
3,,10,,B


'Grouped data:\n'

Unnamed: 0_level_0,Column_A,Column_B,Column_C
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,2.0,5.0,8.0
B,2.0,7.5,8.0


### Merging DataFrames

In [44]:
data1 = {'ID': [1, 2, 3], 'Value1': [4, 5, 6]}
data2 = {'ID': [1, 2, 3], 'Value2': [7, 8, 9]}

df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

merged_df = df1.merge(df2, on='ID')
display("Merged DataFrame:\n", merged_df)

'Merged DataFrame:\n'

Unnamed: 0,ID,Value1,Value2
0,1,4,7
1,2,5,8
2,3,6,9


### Sorting data

In [45]:
data = {'A': [4, 2, 1, 3], 'B': [8, 5, 6, 7]}
df = pd.DataFrame(data)
display("Original DataFrame:\n", df)

'Original DataFrame:\n'

Unnamed: 0,A,B
0,4,8
1,2,5
2,1,6
3,3,7


In [46]:
# A 열로 데이터프레임 정렬
sorted_df = df.sort_values(by='A')
display("Sorted DataFrame:\n", sorted_df)

'Sorted DataFrame:\n'

Unnamed: 0,A,B
2,1,6
1,2,5
3,3,7
0,4,8


### Applying functions to columns

In [47]:
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data)

# 'A' 열에 제곱 함수 적용
df['A_squared'] = df['A'].apply(lambda x: x**2)
display("DataFrame with squared 'A' column:\n", df)

"DataFrame with squared 'A' column:\n"

Unnamed: 0,A,B,A_squared
0,1,4,1
1,2,5,4
2,3,6,9


### Concatenating DataFrames

In [48]:
data1 = {'A': [1, 2, 3], 'B': [4, 5, 6]}
data2 = {'A': [4, 5, 6], 'B': [7, 8, 9]}
df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

In [49]:
concatenated_df = pd.concat([df1, df2], ignore_index=True)
display("Concatenate DataFrame:\n", concatenated_df)

'Concatenate DataFrame:\n'

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6
3,4,7
4,5,8
5,6,9


### Pivot tables

In [51]:
# user-item sample log data
data = {
    'user_id': [1, 1, 1, 2, 2, 2, 3, 3, 4, 4],
    'item_id': [1, 2, 3, 1, 4, 5, 3, 4, 2, 5],
    'rating': [4, 5, 3, 2, 4, 5, 3, 2, 4, 5]
}

rating_log = pd.DataFrame(data)
display("Rating log DataFrame:\n", rating_log)

# 평점 데이터프레임을 pivot으로 만듦
pivot_table = rating_log.pivot_table(index='user_id', columns='item_id', values='rating', fill_value=0)
display("\nPivot table:\n", pivot_table)

'Rating log DataFrame:\n'

Unnamed: 0,user_id,item_id,rating
0,1,1,4
1,1,2,5
2,1,3,3
3,2,1,2
4,2,4,4
5,2,5,5
6,3,3,3
7,3,4,2
8,4,2,4
9,4,5,5


'\nPivot table:\n'

item_id,1,2,3,4,5
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,4.0,5.0,3.0,0.0,0.0
2,2.0,0.0,0.0,4.0,5.0
3,0.0,0.0,3.0,2.0,0.0
4,0.0,4.0,0.0,0.0,5.0


### Addng and dropping columns

In [52]:
data = {'A': [1, 2, 4], 'B': [4, 5, 6]}
df = pd.DataFrame(data)

# 'A'와 'B' 열의 합인 새로운 열 'C' 추가
df['C'] = df['A'] + df['B']
display("DataFrame with added columns 'C':\n", df)

# delete 'C'
df = df.drop(columns=['C'])
display("DataFrame with dropped column 'C':\n", df)

"DataFrame with added columns 'C':\n"

Unnamed: 0,A,B,C
0,1,4,5
1,2,5,7
2,4,6,10


"DataFrame with dropped column 'C':\n"

Unnamed: 0,A,B
0,1,4
1,2,5
2,4,6


### Examples

In [56]:
# load the data
books = pd.read_csv('./data/03_BX-Books.csv')
users = pd.read_csv('./data/02_BX-Users.csv')
ratings = pd.read_csv('./data/01_BX-Book-Ratings.csv')

  books = pd.read_csv('./data/03_BX-Books.csv')


In [57]:
# merge the data
data = pd.merge(pd.merge(ratings, books, on='ISBN'), users, on='User-ID')

# filter books with more than 50 ratings
books_rating_counts = data['ISBN'].value_counts()
popular_books = books_rating_counts[books_rating_counts > 50].index
filtered_data = data[data['ISBN'].isin(popular_books)]
filtered_data

Unnamed: 0.1,Unnamed: 0_x,User-ID,ISBN,Book-Rating,Unnamed: 0_y,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L,Unnamed: 0,Location,Age
0,0,276725,034545104X,0,2966,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,276724,"tyler, texas, usa",
2,2,276727,0446520802,0,11053,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,276726,"h, new south wales, australia",16.0
6,8,276744,038550120X,7,9294,A Painted House,JOHN GRISHAM,2001,Doubleday,http://images.amazon.com/images/P/038550120X.0...,http://images.amazon.com/images/P/038550120X.0...,http://images.amazon.com/images/P/038550120X.0...,276743,"torrance, california, usa",
7,10,276746,0425115801,0,2030,Lightning,Dean R. Koontz,1996,Berkley Publishing Group,http://images.amazon.com/images/P/0425115801.0...,http://images.amazon.com/images/P/0425115801.0...,http://images.amazon.com/images/P/0425115801.0...,276745,"fort worth, ,",
8,11,276746,0449006522,0,227,Manhattan Hunt Club,JOHN SAUL,2002,Ballantine Books,http://images.amazon.com/images/P/0449006522.0...,http://images.amazon.com/images/P/0449006522.0...,http://images.amazon.com/images/P/0449006522.0...,276745,"fort worth, ,",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1031118,1149761,276704,0345386108,6,11885,Winter Moon,Dean R. Koontz,1995,Ballantine Books,http://images.amazon.com/images/P/0345386108.0...,http://images.amazon.com/images/P/0345386108.0...,http://images.amazon.com/images/P/0345386108.0...,276703,"cedar park, texas, usa",
1031124,1149767,276704,0446353957,0,14196,Mirror Image,Sandra Brown,1990,Warner Books,http://images.amazon.com/images/P/0446353957.0...,http://images.amazon.com/images/P/0446353957.0...,http://images.amazon.com/images/P/0446353957.0...,276703,"cedar park, texas, usa",
1031125,1149768,276704,0446605409,0,6262,Plum Island,Nelson DeMille,1998,Warner Books,http://images.amazon.com/images/P/0446605409.0...,http://images.amazon.com/images/P/0446605409.0...,http://images.amazon.com/images/P/0446605409.0...,276703,"cedar park, texas, usa",
1031128,1149771,276704,0743211383,7,881,Dreamcatcher,Stephen King,2001,Scribner,http://images.amazon.com/images/P/0743211383.0...,http://images.amazon.com/images/P/0743211383.0...,http://images.amazon.com/images/P/0743211383.0...,276703,"cedar park, texas, usa",


In [58]:
# filter users who have rated more than 10 books
user_rating_counts = filtered_data['User-ID'].value_counts()
active_users = user_rating_counts[user_rating_counts > 10].index
filtered_data = filtered_data[filtered_data['User-ID'].isin(active_users)]
filtered_data

Unnamed: 0.1,Unnamed: 0_x,User-ID,ISBN,Book-Rating,Unnamed: 0_y,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L,Unnamed: 0,Location,Age
596,795,277042,0060505885,0,9216,The Scandalous Summer of Sissy LeBlanc : A Novel,Loraine Despres,2002,Perennial,http://images.amazon.com/images/P/0060505885.0...,http://images.amazon.com/images/P/0060505885.0...,http://images.amazon.com/images/P/0060505885.0...,277041,"hiram, georgia, usa",
597,796,277042,0061097101,0,12714,The Saving Graces: A Novel,Patricia Gaffney,2000,HarperTorch,http://images.amazon.com/images/P/0061097101.0...,http://images.amazon.com/images/P/0061097101.0...,http://images.amazon.com/images/P/0061097101.0...,277041,"hiram, georgia, usa",
599,798,277042,0312283709,8,4282,Running with Scissors,Augusten Burroughs,2002,St. Martin's Press,http://images.amazon.com/images/P/0312283709.0...,http://images.amazon.com/images/P/0312283709.0...,http://images.amazon.com/images/P/0312283709.0...,277041,"hiram, georgia, usa",
600,799,277042,0312983271,0,6196,Full House (Janet Evanovich's Full Series),Janet Evanovich,2002,St. Martin's Paperbacks,http://images.amazon.com/images/P/0312983271.0...,http://images.amazon.com/images/P/0312983271.0...,http://images.amazon.com/images/P/0312983271.0...,277041,"hiram, georgia, usa",
601,800,277042,0380731851,0,2380,Mystic River,Dennis Lehane,2002,HarperTorch,http://images.amazon.com/images/P/0380731851.0...,http://images.amazon.com/images/P/0380731851.0...,http://images.amazon.com/images/P/0380731851.0...,277041,"hiram, georgia, usa",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1031072,1149714,276688,0553575090,7,62414,Deception on His Mind,ELIZABETH GEORGE,1998,Bantam,http://images.amazon.com/images/P/0553575090.0...,http://images.amazon.com/images/P/0553575090.0...,http://images.amazon.com/images/P/0553575090.0...,276687,"fort lee, new jersey, usa",
1031073,1149715,276688,0553575104,6,8649,In Pursuit of the Proper Sinner,Elizabeth George,2000,Bantam Books,http://images.amazon.com/images/P/0553575104.0...,http://images.amazon.com/images/P/0553575104.0...,http://images.amazon.com/images/P/0553575104.0...,276687,"fort lee, new jersey, usa",
1031082,1149724,276688,0679459618,0,56909,Dr. Death: A Novel,Jonathan Kellerman,2000,Random House Trade,http://images.amazon.com/images/P/0679459618.0...,http://images.amazon.com/images/P/0679459618.0...,http://images.amazon.com/images/P/0679459618.0...,276687,"fort lee, new jersey, usa",
1031084,1149726,276688,0679751521,0,5869,Midnight in the Garden of Good and Evil,John Berendt,1999,Vintage Books USA,http://images.amazon.com/images/P/0679751521.0...,http://images.amazon.com/images/P/0679751521.0...,http://images.amazon.com/images/P/0679751521.0...,276687,"fort lee, new jersey, usa",


In [59]:
# create a user-item matrix
user_item_matrix = filtered_data.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating').fillna(0)

# compute the average rating for each book
average_ratings = filtered_data.groupby('ISBN')['Book-Rating'].mean().sort_values(ascending=True)

# get the top 10 highest-rated books
top_10_books = average_ratings.head(10).index
book_titles = books.set_index('ISBN').loc[top_10_books]['Book-Title']

print('Top 10 highest-rated books:')
display(book_titles)

Top 10 highest-rated books:


ISBN
3257229534                                  Der Vorleser
0345345738                             Women in His Life
0061030430                              Long Time No See
0061092045                           Honor Among Thieves
0515113328              The Cat Who Went into the Closet
1551664348                                       Montana
0971880107                                   Wild Animus
0425173534                                           Ssn
1401088945                        Ground Zero and Beyond
0425168220    State of Siege (Tom Clancy's Op-Center, 6)
Name: Book-Title, dtype: object

In [60]:
books.head()

Unnamed: 0.1,Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [63]:
# example 1: filter books that have the word "Harry Porter" in the title
harry_potter_books = books[books['Book-Title'].str.contains('Harry Potter')]
display("Harry Porter books:\n", harry_potter_books[['Book-Title', 'Book-Author']])

'Harry Porter books:\n'

Unnamed: 0,Book-Title,Book-Author
821,The Sorcerer's Companion: A Guide to the Magic...,ALLAN ZOLA KRONZEK
2143,Harry Potter and the Sorcerer's Stone (Harry P...,J. K. Rowling
2809,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling
3459,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling
3839,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling
...,...,...
247872,Harry Potter Fun Book,Warner Bros.
251233,Harry Potter y El Caliz de Fuego,J. K. Rowling
256403,Harry Potter and the Bible: The Menace Behind...,Richard Abanes
257263,Harry Potter and the Goblet of Fire (Harry Pot...,J. K. Rowling


In [64]:
users

Unnamed: 0.1,Unnamed: 0,User-ID,Location,Age
0,0,1,"nyc, new york, usa",
1,1,2,"stockton, california, usa",18.0
2,2,3,"moscow, yukon territory, russia",
3,3,4,"porto, v.n.gaia, portugal",17.0
4,4,5,"farnborough, hants, united kingdom",
...,...,...,...,...
278853,278853,278854,"portland, oregon, usa",
278854,278854,278855,"tacoma, washington, united kingdom",50.0
278855,278855,278856,"brampton, ontario, canada",
278856,278856,278857,"knoxville, tennessee, usa",


In [65]:
# Example 2: Filter users from a specific country, e.g., 'USA'
usa_users = users[users['Location'].str.contains('USA', case=False)]
display('Users from USA:\n', usa_users.head())

'Users from USA:\n'

Unnamed: 0.1,Unnamed: 0,User-ID,Location,Age
0,0,1,"nyc, new york, usa",
1,1,2,"stockton, california, usa",18.0
5,5,6,"santa monica, california, usa",61.0
6,6,7,"washington, dc, usa",
8,8,9,"germantown, tennessee, usa",


In [66]:
# Example 3: Filter books by a specific author, e.g., 'J.K Rowling'
jk_rowling_books = books[books['Book-Author'] == 'J.K. Rowling']
display('J.K Rowling books:\n', jk_rowling_books['Book-Title'])

'J.K Rowling books:\n'

35086     Harry Potter and the Prisoner of Azkaban (Harr...
50434     Harry Potter and the Philosopher's Stone (Cove...
107936    Harry Potter and the Philosopher's Stone (Cove...
110431                  Harry Potter y la Ã?rden del FÃ©nix
145529    Harry Potter et l'Ordre du PhÃ©nix (Harry Pott...
200073             Harry Potter and the Philosopher's Stone
205143             Harry Potter and the Philosopher's Stone
214147                                  I El Pres D'askaban
224187                                 I La Pedra Filosofal
259611                Harry Potter and the Sorcerer's Stone
Name: Book-Title, dtype: object

In [67]:
# example 4. Filter books published after a certain year, e.g., 2000
books['Year-Of-Publication'] = pd.to_numeric(books['Year-Of-Publication'], errors='coerce')
books_after_2000 = books[books['Year-Of-Publication'] > 2000]
display("Books published after 2000:\n", books_after_2000)

'Books published after 2000:\n'

Unnamed: 0.1,Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,0,0195153448,Classical Mythology,Mark P. O. Morford,2002.0,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,1,0002005018,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
9,9,074322678X,Where You'll Find Me: And Other Stories,Ann Beattie,2002.0,Scribner,http://images.amazon.com/images/P/074322678X.0...,http://images.amazon.com/images/P/074322678X.0...,http://images.amazon.com/images/P/074322678X.0...
12,12,0887841740,The Middle Stories,Sheila Heti,2004.0,House of Anansi Press,http://images.amazon.com/images/P/0887841740.0...,http://images.amazon.com/images/P/0887841740.0...,http://images.amazon.com/images/P/0887841740.0...
21,21,1841721522,New Vegetarian: Bold and Beautiful Recipes for...,Celia Brooks Brown,2001.0,Ryland Peters &amp; Small Ltd,http://images.amazon.com/images/P/1841721522.0...,http://images.amazon.com/images/P/1841721522.0...,http://images.amazon.com/images/P/1841721522.0...
...,...,...,...,...,...,...,...,...,...
271349,271349,0520242335,Strong Democracy : Participatory Politics for ...,Benjamin R. Barber,2004.0,University of California Press,http://images.amazon.com/images/P/0520242335.0...,http://images.amazon.com/images/P/0520242335.0...,http://images.amazon.com/images/P/0520242335.0...
271350,271350,0762412119,"Burpee Gardening Cyclopedia: A Concise, Up to ...",Allan Armitage,2002.0,Running Press Book Publishers,http://images.amazon.com/images/P/0762412119.0...,http://images.amazon.com/images/P/0762412119.0...,http://images.amazon.com/images/P/0762412119.0...
271351,271351,1582380805,Tropical Rainforests: 230 Species in Full Colo...,"Allen M., Ph.D. Young",2001.0,Golden Guides from St. Martin's Press,http://images.amazon.com/images/P/1582380805.0...,http://images.amazon.com/images/P/1582380805.0...,http://images.amazon.com/images/P/1582380805.0...
271352,271352,1845170423,Cocktail Classics,David Biggs,2004.0,Connaught,http://images.amazon.com/images/P/1845170423.0...,http://images.amazon.com/images/P/1845170423.0...,http://images.amazon.com/images/P/1845170423.0...


In [70]:
# example 5: group the ratings data by the user's country and compute the average rating
data_with_country = pd.merge(ratings, users, on='User-ID')
average_ratings_by_country = data_with_country.groupby(data_with_country['Location'].str.split(',').str[-1].str.strip())['Book-Rating'].mean()
display("Average ratings by country:\n", average_ratings_by_country)

'Average ratings by country:\n'

Location
                    3.249465
"                   2.333333
&#20013;&#22269;    9.000000
.                   0.000000
\"n/a\""            5.333333
                      ...   
yemen               9.000000
ysa                 4.666667
yugoslavia          6.500000
zambia              7.000000
zimbabwe            1.277778
Name: Book-Rating, Length: 329, dtype: float64

In [71]:
data_with_country.head()

Unnamed: 0,Unnamed: 0_x,User-ID,ISBN,Book-Rating,Unnamed: 0_y,Location,Age
0,0,276725,034545104X,0,276724,"tyler, texas, usa",
1,1,276726,0155061224,5,276725,"seattle, washington, usa",
2,2,276727,0446520802,0,276726,"h, new south wales, australia",16.0
3,3,276729,052165615X,3,276728,"rijeka, n/a, croatia",16.0
4,4,276729,0521795028,6,276728,"rijeka, n/a, croatia",16.0
