# Makes cleaning_df

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore") 


pd.set_option('display.max_columns', 300)
pd.set_option('display.max_rows', 200)


### Import scraped dataframes

In [2]:
mens_df = pd.read_csv("item_review/walmart_reviews.csv")
womens_df = pd.read_csv("item_review/walmart_womens_reviews.csv")
boys_df = pd.read_csv("item_review/walmart_boys_clothing_reviews.csv")
girls_df = pd.read_csv("item_review/walmart_girls_clothing_reviews.csv")

In [3]:
womens_df.drop(columns=["Unnamed: 0"], inplace = True)
womens_df.drop_duplicates(subset="item_name", inplace=True)
womens_df["section"] = "women's clothing"

mens_df.drop(columns=["Unnamed: 0"], inplace = True)
mens_df.drop_duplicates(subset="item_name", inplace=True)
mens_df["section"] = "men's clothing"

boys_df.drop(columns=["Unnamed: 0"], inplace = True)
boys_df.drop_duplicates(subset="item_name", inplace=True)
boys_df["section"] = "boy's clothing"

girls_df.drop(columns=["Unnamed: 0"], inplace = True)
girls_df.drop_duplicates(subset="item_name", inplace=True)
girls_df["section"]="girl's clothing"

In [4]:
print(mens_df.shape)
print(womens_df.shape)
print(boys_df.shape)
print(girls_df.shape)

(308, 10)
(471, 10)
(154, 10)
(292, 10)


In [5]:
frames = [mens_df, womens_df, boys_df, girls_df]
cleaning_df = pd.concat(frames, ignore_index=True)

In [6]:
cleaning_df.shape

(1225, 10)

In [7]:
cleaning_df.to_csv("cleaning_df.csv")

In [8]:
cleaning_df.head()
# things to check - if num_rating >3:
# If overall rating = NaN then 1. If recommened = NaN then 0. But check the above condition first. This would mean that not enough people purchased the item. 

Unnamed: 0,item_name,price,overall_rating,num_rating,recommend,rev_title,rev_date,review_text,rating_stars,section
0,Wrangler Men's Unlined Shirt Jacket,$12.00 $ 12 . 00 $12.00 $ 12 . 00,4.0,7 ratings,80%,['Texas winter ready!'],"November 26, 2020",['I bought this last year and am going to buy ...,['Average Rating: ( 5.0 ) out of 5 stars 7 rat...,men's clothing
1,Wrangler Men's Relaxed Fit Jeans,$12.00 $ 12 . 00 $12.00 $ 12 . 00,4.5,5840 ratings,90%,"['Wrangler is the best.', 'My Favorite Jeans',...","August 5, 2020","['I like wrangler, the pants is exactly for my...",['Average Rating: ( 5.0 ) out of 5 stars 5840 ...,men's clothing
2,Wrangler Big Men's Relaxed Fit Jean,$15.00 $ 15 . 00 $15.00 $ 15 . 00,4.3,3257 ratings,88%,"['Jeans That Fit Just Right', 'Great jeans wou...","September 19, 2020","[""If you like Big Men's style wearing jeans th...",['Average Rating: ( 5.0 ) out of 5 stars 3257 ...,men's clothing
3,Wrangler Men's 5 Star Regular Fit Jean with Flex,$12.00 $ 12 . 00 $12.00 $ 12 . 00,4.5,259 ratings,91%,"['Committed buyer', 'More Comfortable than Swe...","January 6, 2020",['My husband loves these jeans and goes throug...,['Average Rating: ( 5.0 ) out of 5 stars 259 r...,men's clothing
4,Wrangler Men's and Big Men's 5 Star Relaxed Fi...,$15.00 $ 15 . 00 $15.00 $ 15 . 00,4.6,1490 ratings,91%,['Saved money and faster delivery that Amazon'...,"November 1, 2020",['Fit well Great price same jeans I was buying...,['Average Rating: ( 5.0 ) out of 5 stars 1490 ...,men's clothing


In [9]:
cleaning_df[cleaning_df.index==500]

Unnamed: 0,item_name,price,overall_rating,num_rating,recommend,rev_title,rev_date,review_text,rating_stars,section
500,Just My Size Plus-Size Women's Long-Sleeve Sco...,$8.24 $ 8 . 24 $8.24 $ 8 . 24,4.1,456 ratings,73%,['Get them while you can or you will regret it...,"April 8, 2020","[""I saw the items on line and loved the colors...",['Average Rating: ( 5.0 ) out of 5 stars 456 r...,women's clothing


In [10]:
cleaning_df.review_text[500]

'["I saw the items on line and loved the colors the price was very reasonable. I read the description and ordered my size 1x. When l received them because l brought 5, l tried one of them on and was very pleased and excited. They are true to fit, the colors l ordered  was just as l pictured l really love the pink one because l love bright colors, very comfortable, good arm length and hugs your body just right. There is nothing negative l have to say regarding the tops. My only regret is that l didn\'t get 2 of the pink .", "I\'m a  big women and these tees are great. I buy them in every color. I LOVE JMS clothing a great deal. They always fit perfectly, wash beautifully, and are a big bang for the buck.", \'Beautiful vibrant colors.  Warmer than I thought.  If you are short like me , can use as a night shirt. If you are average you can wear for  a pajama top.  Yes they are that comfortable.  Great price.  Lots of give in the fabric. Thanks just my size and walmart.\', \'Very beautiful 

In [11]:
cleaning_df.rev_date[500]

'April 8, 2020'

In [12]:
cleaning_df.num_rating[500]

'456 ratings'

In [13]:
cleaning_df.price[500]

'$8.24 $ 8 . 24 $8.24 $ 8 . 24'

In [14]:
cleaning_df.isnull().sum()

item_name           2
price               2
overall_rating    108
num_rating          0
recommend         108
rev_title         121
rev_date            2
review_text        76
rating_stars        2
section             0
dtype: int64

In [15]:
cleaning_df.drop(columns = "rev_date", inplace = True)

In [16]:
cleaning_df.dropna(subset=["item_name"], axis = 0, how="any", inplace=True)

In [17]:
cleaning_df.isnull().sum()

item_name           0
price               0
overall_rating    106
num_rating          0
recommend         106
rev_title         119
review_text        74
rating_stars        0
section             0
dtype: int64

In [18]:
cleaning_df.review_text.isnull()

0       False
1       False
2       False
3       False
4       False
        ...  
1220    False
1221    False
1222    False
1223    False
1224    False
Name: review_text, Length: 1223, dtype: bool

In [19]:
cleaning_df[cleaning_df.index == 1101]

Unnamed: 0,item_name,price,overall_rating,num_rating,recommend,rev_title,review_text,rating_stars,section
1101,Awkward Styles Matching Christmas Pajamas Set ...,$19.95 $ 19 . 95 $19.95 $ 19 . 95,,1 ratings,,,,['Average Rating: ( 5.0 ) out of 5 stars 1 rat...,girl's clothing


In [20]:
cleaning_df["overall_rating"] = cleaning_df["overall_rating"].fillna(0)

In [21]:
cleaning_df[cleaning_df["overall_rating"]==0]

Unnamed: 0,item_name,price,overall_rating,num_rating,recommend,rev_title,review_text,rating_stars,section
5,AND1 Men's Pro Platinum Cushion Performance No...,$5.00 $ 5 . 00 $5.00 $ 5 . 00,0.0,1 ratings,,,,['Average Rating: ( 1.0 ) out of 5 stars 1 rat...,men's clothing
25,"Champion Men's Super No Show Multi Logo, 6 Pack",$11.40 $ 11 . 40 $11.40 $ 11 . 40,0.0,7 ratings,,['Sock order'],"[""Product as expected and well priced. Can't c...",['Average Rating: ( 5.0 ) out of 5 stars 7 rat...,men's clothing
27,Dr. Seuss Men's The Grinch 2 Pack Boxer Briefs,$12.99 $ 12 . 99 $12.99 $ 12 . 99,0.0,1 ratings,,,,['Average Rating: ( 2.0 ) out of 5 stars 1 rat...,men's clothing
35,Champion Men's Powerblend Applique Crewneck Sw...,$27.50 $ 27 . 50 $27.50 $ 27 . 50,0.0,3 ratings,,['Great product!'],['Really a sharp looking quality product. Wil...,['Average Rating: ( 5.0 ) out of 5 stars 3 rat...,men's clothing
40,Star Wars The Child Standing Men's and Big Men...,$12.44 $ 12 . 44 $12.44 $ 12 . 44,0.0,2 ratings,,,['My wife envies this shirt'],['Average Rating: ( 5.0 ) out of 5 stars 2 rat...,men's clothing
41,Rokka&Rolla Men's Lightweight Pullover Anorak ...,$23.99 $ 23 . 99 $23.99 $ 23 . 99,0.0,14 ratings,,['XL was too small for me still deserves 5 sta...,"[""It was kind of small. Can't wear it with a s...",['Average Rating: ( 5.0 ) out of 5 stars 14 ra...,men's clothing
47,Reebok Men's Jolt Short Sleeve Top,$12.88 $ 12 . 88 $12.88 $ 12 . 88,0.0,4 ratings,,,,['Average Rating: ( 5.0 ) out of 5 stars 4 rat...,men's clothing
57,ORORO Men's Heated Jacket Kit With Detachable ...,$135.99 $ 135 . 99 $135.99 $ 135 . 99,0.0,250 ratings,,"['Excellent deal for the price!', 'I really li...","[""Good materials,warm I'm happy"", 'I can’t eve...",['Average Rating: ( 5.0 ) out of 5 stars 250 r...,men's clothing
59,Eddie Bauer Men's CirrusLite Down Vest,$51.00 $ 51 . 00 $51.00 $ 51 . 00,0.0,5 ratings,,"['Great vest!', 'Nice and comfortable vest!', ...",['This vest is perfect as a base layer or by i...,['Average Rating: ( 5.0 ) out of 5 stars 5 rat...,men's clothing
60,"Champion Men's Everyday Comfort Boxers, 3 Pack",$19.20 $ 19 . 20 $19.20 $ 19 . 20,0.0,1 ratings,,,,['Average Rating: ( 5.0 ) out of 5 stars 1 rat...,men's clothing


In [29]:
cleaning_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1223 entries, 0 to 1224
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   item_name       1223 non-null   object 
 1   price           1223 non-null   object 
 2   overall_rating  1223 non-null   float64
 3   num_rating      1223 non-null   object 
 4   recommend       1117 non-null   object 
 5   rev_title       1104 non-null   object 
 6   review_text     1149 non-null   object 
 7   rating_stars    1223 non-null   object 
 8   section         1223 non-null   object 
dtypes: float64(1), object(8)
memory usage: 95.5+ KB


In [23]:
cleaning_df.isnull().sum()

item_name           0
price               0
overall_rating      0
num_rating          0
recommend         106
rev_title         119
review_text        74
rating_stars        0
section             0
dtype: int64

In [28]:
cleaning_df[cleaning_df.recommend < "50%"]

Unnamed: 0,item_name,price,overall_rating,num_rating,recommend,rev_title,review_text,rating_stars,section
7,Fruit of the Loom Men's Core Waffle Thermal Top,$5.00 $ 5 . 00 $5.00 $ 5 . 00,3.3,9 ratings,100%,"['Love them', 'Love them - definitely a classi...",['These are very cozy! Body pair for me and my...,['Average Rating: ( 5.0 ) out of 5 stars 9 rat...,men's clothing
9,Free Assembly Men's Two-Pocket Flannel Shirt,$18.00 $ 18 . 00 $18.00 $ 18 . 00,4.9,22 ratings,100%,"['Washes well. Soft. Good value.', 'Super su...",['My husband also likes this shirt. I washed a...,['Average Rating: ( 5.0 ) out of 5 stars 22 ra...,men's clothing
11,Eddie Bauer Men's CirrusLite Down Jacket,$49.98 $ 49 . 98 $49.98 $ 49 . 98,4.4,16 ratings,44%,"[""Don't Be Fooled By The Low Price"", 'Great li...","[""This jacket has REALLY impressed me. It's S...",['Average Rating: ( 5.0 ) out of 5 stars 16 ra...,men's clothing
15,Free Assembly Men's Everyday Chambray Shirt,$17.00 $ 17 . 00 $17.00 $ 17 . 00,4.2,10 ratings,100%,"['Great Shirt', 'Great!', 'Highly Recommend', ...","['Looks good, feels good, true to size', 'Perf...",['Average Rating: ( 5.0 ) out of 5 stars 10 ra...,men's clothing
16,Lazer Men's Ripstop Cargo Pants,$10.00 $ 10 . 00 $10.00 $ 10 . 00,4.0,5 ratings,100%,['Sizing is off'],"[""Sizing is off. I am a 32 waist in jeans and ...",['Average Rating: ( 2.0 ) out of 5 stars 5 rat...,men's clothing
...,...,...,...,...,...,...,...,...,...
1218,Forever Me Girls Floral Crochet Cozy With Frin...,$16.50 $ 16 . 50 $16.50 $ 16 . 50,4.8,10 ratings,100%,"['Too cute', 'such a cute outfit', 'Very cute!...",['This is an adorable outfit and I cant wait t...,['Average Rating: ( 5.0 ) out of 5 stars 10 ra...,girl's clothing
1219,Nickelodeon Jojo Siwa Star Swirl Scuff Slipper...,$24.99 $ 24 . 99 $24.99 $ 24 . 99,4.8,4 ratings,100%,['what we wanted'],['My great granddaughter loves theses house sh...,['Average Rating: ( 5.0 ) out of 5 stars 4 rat...,girl's clothing
1221,Toddler Kids Baby Girls Outfit Floral Top Blou...,$15.99 $ 15 . 99 $15.99 $ 15 . 99,2.5,6 ratings,40%,['go up a size'],['make sure to go up a size. shirt runs small ...,['Average Rating: ( 3.0 ) out of 5 stars 6 rat...,girl's clothing
1222,Child of Mine by Carter's Baby & Toddler Girls...,$12.94 $ 12 . 94 $12.94 $ 12 . 94,4.8,6 ratings,100%,"['I Wish It Came in Adult Sizes', 'Very Cute',...",['This is the cutest outfit! The top is a litt...,['Average Rating: ( 5.0 ) out of 5 stars 6 rat...,girl's clothing


In [30]:
cleaning_df[cleaning_df.overall_rating < 4]

Unnamed: 0,item_name,price,overall_rating,num_rating,recommend,rev_title,review_text,rating_stars,section
5,AND1 Men's Pro Platinum Cushion Performance No...,$5.00 $ 5 . 00 $5.00 $ 5 . 00,0.0,1 ratings,,,,['Average Rating: ( 1.0 ) out of 5 stars 1 rat...,men's clothing
7,Fruit of the Loom Men's Core Waffle Thermal Top,$5.00 $ 5 . 00 $5.00 $ 5 . 00,3.3,9 ratings,100%,"['Love them', 'Love them - definitely a classi...",['These are very cozy! Body pair for me and my...,['Average Rating: ( 5.0 ) out of 5 stars 9 rat...,men's clothing
25,"Champion Men's Super No Show Multi Logo, 6 Pack",$11.40 $ 11 . 40 $11.40 $ 11 . 40,0.0,7 ratings,,['Sock order'],"[""Product as expected and well priced. Can't c...",['Average Rating: ( 5.0 ) out of 5 stars 7 rat...,men's clothing
27,Dr. Seuss Men's The Grinch 2 Pack Boxer Briefs,$12.99 $ 12 . 99 $12.99 $ 12 . 99,0.0,1 ratings,,,,['Average Rating: ( 2.0 ) out of 5 stars 1 rat...,men's clothing
35,Champion Men's Powerblend Applique Crewneck Sw...,$27.50 $ 27 . 50 $27.50 $ 27 . 50,0.0,3 ratings,,['Great product!'],['Really a sharp looking quality product. Wil...,['Average Rating: ( 5.0 ) out of 5 stars 3 rat...,men's clothing
...,...,...,...,...,...,...,...,...,...
1205,Marika Girls Graphic Tie-Front Long Sleeve T-S...,$10.00 $ 10 . 00 $10.00 $ 10 . 00,0.0,1 ratings,,,,['Average Rating: ( 5.0 ) out of 5 stars 1 rat...,girl's clothing
1206,Boutique Kids Baby Girls Leopard Clothes Top T...,$16.15 $ 16 . 15 $16.15 $ 16 . 15,3.6,5 ratings,66%,['Good'],['It was very cute has a tear in it. I will be...,['Average Rating: ( 4.0 ) out of 5 stars 5 rat...,girl's clothing
1216,Btween Girls 7-14 Flip Sequin Pocket Graphic P...,$10.00 $ 10 . 00 $10.00 $ 10 . 00,3.7,3 ratings,50%,"['Thin but cute', 'Super cute but gets holes w...",['This hoodie is a little on the thinner side ...,['Average Rating: ( 5.0 ) out of 5 stars 3 rat...,girl's clothing
1217,MarinaVida autumn and winter fashion women's c...,$14.77 $ 14 . 77 $14.77 $ 14 . 77,2.4,12 ratings,16%,"['Not as described', 'FEELING SO SAD FOR MY DA...",['Thankfully my 19 yr old daughter is petite! ...,['Average Rating: ( 2.0 ) out of 5 stars 12 ra...,girl's clothing


In [32]:
cleaning_df.review_text.isna().sum()

74

In [36]:
cleaning_df.dropna(subset=["review_text"])

Unnamed: 0,item_name,price,overall_rating,num_rating,recommend,rev_title,review_text,rating_stars,section
0,Wrangler Men's Unlined Shirt Jacket,$12.00 $ 12 . 00 $12.00 $ 12 . 00,4.0,7 ratings,80%,['Texas winter ready!'],['I bought this last year and am going to buy ...,['Average Rating: ( 5.0 ) out of 5 stars 7 rat...,men's clothing
1,Wrangler Men's Relaxed Fit Jeans,$12.00 $ 12 . 00 $12.00 $ 12 . 00,4.5,5840 ratings,90%,"['Wrangler is the best.', 'My Favorite Jeans',...","['I like wrangler, the pants is exactly for my...",['Average Rating: ( 5.0 ) out of 5 stars 5840 ...,men's clothing
2,Wrangler Big Men's Relaxed Fit Jean,$15.00 $ 15 . 00 $15.00 $ 15 . 00,4.3,3257 ratings,88%,"['Jeans That Fit Just Right', 'Great jeans wou...","[""If you like Big Men's style wearing jeans th...",['Average Rating: ( 5.0 ) out of 5 stars 3257 ...,men's clothing
3,Wrangler Men's 5 Star Regular Fit Jean with Flex,$12.00 $ 12 . 00 $12.00 $ 12 . 00,4.5,259 ratings,91%,"['Committed buyer', 'More Comfortable than Swe...",['My husband loves these jeans and goes throug...,['Average Rating: ( 5.0 ) out of 5 stars 259 r...,men's clothing
4,Wrangler Men's and Big Men's 5 Star Relaxed Fi...,$15.00 $ 15 . 00 $15.00 $ 15 . 00,4.6,1490 ratings,91%,['Saved money and faster delivery that Amazon'...,['Fit well Great price same jeans I was buying...,['Average Rating: ( 5.0 ) out of 5 stars 1490 ...,men's clothing
...,...,...,...,...,...,...,...,...,...
1220,"dELiA*s Girls Pajama Set, 2-Piece, Sizes 7-16",$17.60 $ 17 . 60 $17.60 $ 17 . 60,4.4,8 ratings,75%,,"[""This was very small. The 14-16 was way too l...",['Average Rating: ( 3.0 ) out of 5 stars 8 rat...,girl's clothing
1221,Toddler Kids Baby Girls Outfit Floral Top Blou...,$15.99 $ 15 . 99 $15.99 $ 15 . 99,2.5,6 ratings,40%,['go up a size'],['make sure to go up a size. shirt runs small ...,['Average Rating: ( 3.0 ) out of 5 stars 6 rat...,girl's clothing
1222,Child of Mine by Carter's Baby & Toddler Girls...,$12.94 $ 12 . 94 $12.94 $ 12 . 94,4.8,6 ratings,100%,"['I Wish It Came in Adult Sizes', 'Very Cute',...",['This is the cutest outfit! The top is a litt...,['Average Rating: ( 5.0 ) out of 5 stars 6 rat...,girl's clothing
1223,Wonder Nation Girls Plush Full-Zip Jacket with...,$12.00 $ 12 . 00 $12.00 $ 12 . 00,4.4,28 ratings,90%,"['Nice plush jacket', 'Fluffy hoodie', 'So sof...",['A nice plush jacket for a young girl. It is ...,['Average Rating: ( 5.0 ) out of 5 stars 28 ra...,girl's clothing
