# Creating, Reading, and Writing DataFrames

#### Import pandas using the alias pd

In [1]:
import pandas as pd

#### Creating a DataFrame
- The keys in the dictionary are the column names.  
- The values in the lists inside the dictionary are the entries.  

In [2]:
pd.DataFrame({'Yes' : [50, 21], 'No' : [131, 2]})

Unnamed: 0,Yes,No
0,50,131
1,21,2


#### Creating a DataFrame with the rows (or records) defined
index=[] lists the row names

In [3]:
pd.DataFrame({'Bob': ['I liked it.', 'It was awful.'], 
              'Sue': ['Pretty good.', 'Bland.']},
             index=['Product A', 'Product B'])

Unnamed: 0,Bob,Sue
Product A,I liked it.,Pretty good.
Product B,It was awful.,Bland.


#### Saving a DataFrame to disk as a csv file

In [4]:
reviews = pd.DataFrame({'Bob': ['I liked it.', 'It was awful.'], 
              'Sue': ['Pretty good.', 'Bland.']},
             index=['Product A', 'Product B'])

reviews.to_csv("reviews.csv")

#### Creating a Series

In [5]:
pd.Series([1, 2, 3, 4, 5])

0    1
1    2
2    3
3    4
4    5
dtype: int64

#### Creating a Series with a name

In [6]:
pd.Series([30, 35, 40], index=['2015 Sales', '2016 Sales', '2017 Sales'], name='Product A')

2015 Sales    30
2016 Sales    35
2017 Sales    40
Name: Product A, dtype: int64

#### Reading a csv file

In [7]:
#Downloaded from: https://www.kaggle.com/datasets/sujaykapadnis/horror-movies-dataset
horror_movies = pd.read_csv("../horror_movie_playground/data/horror_movies.csv")

#### Checking the size of the DataFrame
Note: The shape attribute doesn't use parentheses because it is a property that wouldn't make sense with paremeters.

In [8]:
horror_movies.shape

(32540, 20)

### Checking the first few and last few rows of the DataFrame
Note: The default is 5, but I can change that by passing the number I want as an argument.

In [9]:
horror_movies.head()

horror_movies.tail()

Unnamed: 0,id,original_title,title,original_language,overview,tagline,release_date,poster_path,popularity,vote_count,vote_average,budget,revenue,runtime,status,adult,backdrop_path,genre_names,collection,collection_name
32535,1032201,Catch a Butcher,Catch a Butcher,en,In a maternity ward in nineteenth-century Indi...,,2021-11-20,/4LHrYpe6bjiT8Wvkzl3269LIUZl.jpg,0.6,0,0.0,0,0,11,Released,False,,Horror,,
32536,1032667,Shaye Saint John: The Triggers Compilation,Shaye Saint John: The Triggers Compilation,en,A bizarre mix of early Tod Hayne's and Phyllis...,"""Just keep in mind, that you'll be trying to s...",2006-10-30,/j1vWwxnTsoKjZFwJODiNHNS8GFK.jpg,0.6,0,0.0,0,0,92,Released,False,,"Fantasy, Horror",,
32537,1032890,Welcome to the Occult Forest,Welcome to the Occult Forest,en,A washed up horror film director named Kôji Ku...,"Murders, crimes, laughter, psychics, superhero...",2022-07-22,/ngFyxdliPZrG3Qra7tW3LbWksFd.jpg,0.6,0,0.0,0,0,141,Released,False,/s8Kpai084gCgQlsT3bpaEYuJmDc.jpg,"Comedy, Drama, Horror",,
32538,1033052,The Mean One,The Mean One,en,"The Mean One is a hairy, green-skinned grump i...",,2022-12-15,,0.0,0,0.0,0,0,0,Post Production,False,,Horror,,
32539,1033095,Secret Formula,Secret Formula,en,,,2020-12-06,/5GTTNqHwXfqqnBPrZAv0DVoWfjr.jpg,0.0,0,0.0,0,0,0,Released,False,,"Animation, Horror",,


# Indexing, Selecting & Assigning

### Accessing a column (a.k.a., accessing a specific series within a DataFrame)

In [10]:
horror_movies.title.head()

#also this way (good for times when the column has reserved characters like spaces):
horror_movies['title'].head()

#We can even access a specific value by calling the column and index:
horror_movies['title'][100]

'Terrifier 2'

### Accessing indexes using pandas
Both of the examples below are row-first, column second, which is the opposite of native Python.  
This makes it marginally easier to access rows and harder to access columns.

In [11]:
#index-based selection:
horror_movies.iloc[100]

#index-based selection using both row and column:
horror_movies.iloc[100, 1]

#REMINDER: LIKE ALL WORK WITH INDEXES, WE CAN SLICE OR USE A LIST FOR MULTIPLE INDEXES
horror_movies.iloc[:3, 1] #the first 3 entries for the second column
horror_movies.iloc[[0, 1, 2], 1] #another way to do the operation above

0    Orphan: First Kill
1                 Beast
2                 Smile
Name: original_title, dtype: object

In [12]:
### Label-based selection using pandas

In [13]:
horror_movies.loc[100, 'title'] #displays the 101 entry under the column 'title'

horror_movies.loc[:5, ['title', 'tagline', 'runtime']] #displays the columns 'title', 'tagline', and 'runtime' for the first 6 rows

#IMPORTANT: .loc IS INCLUSIVE. THAT'S WHY :5 GIVES 6 ENTRIES INSTEAD OF 5.

Unnamed: 0,title,tagline,runtime
0,Orphan: First Kill,There's always been something wrong with Esther.,99
1,Beast,Fight for family.,93
2,Smile,"Once you see it, it’s too late.",115
3,The Black Phone,Never talk to strangers.,103
4,Presences,,0
5,Sonríe,,0


### Manipulating the index

In [14]:
horror_movies.set_index('title') #makes the 'title' column the index

Unnamed: 0_level_0,id,original_title,original_language,overview,tagline,release_date,poster_path,popularity,vote_count,vote_average,budget,revenue,runtime,status,adult,backdrop_path,genre_names,collection,collection_name
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Orphan: First Kill,760161,Orphan: First Kill,en,After escaping from an Estonian psychiatric fa...,There's always been something wrong with Esther.,2022-07-27,/pHkKbIRoCe7zIFvqan9LFSaQAde.jpg,5088.584,902,6.9,0,9572765,99,Released,False,/5GA3vV1aWWHTSDO5eno8V5zDo8r.jpg,"Horror, Thriller",760193.0,Orphan Collection
Beast,760741,Beast,en,A recently widowed man and his two teenage dau...,Fight for family.,2022-08-11,/xIGr7UHsKf0URWmyyd5qFMAq4d8.jpg,2172.338,584,7.1,0,56000000,93,Released,False,/2k9tBql5GYH328Krj66tDT9LtFZ.jpg,"Adventure, Drama, Horror",,
Smile,882598,Smile,en,"After witnessing a bizarre, traumatic incident...","Once you see it, it’s too late.",2022-09-23,/hiaeZKzwsk4y4atFhmncO5KRxeT.jpg,1863.628,114,6.8,17000000,45000000,115,Released,False,/mVNPfpydornVe4H4UCIk7WevWjf.jpg,"Horror, Mystery, Thriller",,
The Black Phone,756999,The Black Phone,en,"Finney Blake, a shy but clever 13-year-old boy...",Never talk to strangers.,2022-06-22,/lr11mCT85T1JanlgjMuhs9nMht4.jpg,1071.398,2736,7.9,18800000,161000000,103,Released,False,/AfvIjhDu9p64jKcmohS4hsPG95Q.jpg,"Horror, Thriller",,
Presences,772450,Presencias,es,A man who loses his wife and goes to seclude h...,,2022-09-07,/dgDT3uol3mdvwEg0jt1ble3l9hw.jpg,1020.995,83,7.0,0,0,0,Released,False,/ojfzhdwRemcDt1I6pao6vVLw9AA.jpg,Horror,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Catch a Butcher,1032201,Catch a Butcher,en,In a maternity ward in nineteenth-century Indi...,,2021-11-20,/4LHrYpe6bjiT8Wvkzl3269LIUZl.jpg,0.600,0,0.0,0,0,11,Released,False,,Horror,,
Shaye Saint John: The Triggers Compilation,1032667,Shaye Saint John: The Triggers Compilation,en,A bizarre mix of early Tod Hayne's and Phyllis...,"""Just keep in mind, that you'll be trying to s...",2006-10-30,/j1vWwxnTsoKjZFwJODiNHNS8GFK.jpg,0.600,0,0.0,0,0,92,Released,False,,"Fantasy, Horror",,
Welcome to the Occult Forest,1032890,Welcome to the Occult Forest,en,A washed up horror film director named Kôji Ku...,"Murders, crimes, laughter, psychics, superhero...",2022-07-22,/ngFyxdliPZrG3Qra7tW3LbWksFd.jpg,0.600,0,0.0,0,0,141,Released,False,/s8Kpai084gCgQlsT3bpaEYuJmDc.jpg,"Comedy, Drama, Horror",,
The Mean One,1033052,The Mean One,en,"The Mean One is a hairy, green-skinned grump i...",,2022-12-15,,0.000,0,0.0,0,0,0,Post Production,False,,Horror,,


### Conditional selection

In [15]:
horror_movies.loc[horror_movies.original_title != horror_movies.title] #displays movies where the original title does not match the title

#Since this seems to happen most with non-English movies, let's see which English movies changed their names.

horror_movies.loc[(horror_movies.original_title != horror_movies.title) & (horror_movies.original_language == 'en')] #In pandas, we use '&' for 'and' and '|' for 'or'

Unnamed: 0,id,original_title,title,original_language,overview,tagline,release_date,poster_path,popularity,vote_count,vote_average,budget,revenue,runtime,status,adult,backdrop_path,genre_names,collection,collection_name
387,133121,Biohazard: Damnation,Resident Evil: Damnation,en,U.S. federal agent Leon S. Kennedy sneaks into...,"When Terror Crawls, Evil is Unleashed",2012-09-25,/9T3u6laO3TRgJk4SFntt4UK1oXW.jpg,40.315,671,6.8,0,2325035,100,Released,False,/6pRNDb0yStxlwhavn2ZqYRVFOJ5.jpg,"Action, Animation, Horror",133352.0,Resident Evil: Biohazard Collection
445,270771,The Well,The Last Survivors,en,"In the near future, society collapses and wate...",Stand your ground.,2014-06-12,/pUTps5z6OXWCvuWooQE2py417o4.jpg,37.163,175,6.3,0,0,95,Released,False,/an4pOo1e3uARdKfvMZneCM4xgqB.jpg,"Action, Horror, Thriller",,
504,631947,La Llorona,The Legend of La Llorona,en,"While vacationing in Mexico, a couple discover...",Her spirit lives.,2022-01-07,/wtgEJKEOLnyQkWemEdFa5W8Q29L.jpg,32.519,31,5.0,0,0,98,Released,False,/9BQPM1Fa1hdXaE7hNafO5WWNv9v.jpg,"Horror, Thriller",,
610,25239,Day of the Woman,I Spit on Your Grave,en,After a young writer is brutally gang-raped an...,...An act of vengeance,1978-11-02,/nMCkOo26GtISoxJmpkTAJyMqBO8.jpg,27.131,452,6.0,650000,0,101,Released,False,/2W8iYJBSA25ENrM25GbVO4EiWAO.jpg,"Horror, Thriller",296429.0,I Spit on Your Grave (Original) Collection
838,13648,Baiohazâdo: Dijenerêshon,Resident Evil: Degeneration,en,Leon S. Kennedy and Claire Redfield must battl...,1996 - The Mansion Experiment,2008-02-13,/iyaJ1uZIogd3BWkgMVplmG2Cw6V.jpg,20.906,846,6.8,0,0,97,Released,False,/ug8cy6tthmLWF4ZcFMCAvGkUXM7.jpg,"Action, Animation, Horror, Science Fiction",133352.0,Resident Evil: Biohazard Collection
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32199,1005301,Buenas Manos,Good Hands,en,A young couple makes the unfortunate mistake o...,,2014-12-31,/1N4Fs6XxJ0tPXRcxkj0ro4Mwaem.jpg,0.600,1,8.0,0,0,5,Released,False,,"Comedy, Horror",,
32203,1005500,El Hombre Tango,The Tango Man,en,A boy's sleeping time is interrupted when the ...,,2022-07-14,/qSGbrep863yZ8wWXJws3gAHn63w.jpg,0.600,1,10.0,0,0,9,Released,False,,"Horror, Mystery, Science Fiction, Thriller",,
32228,1007334,魚魔女,Daughter of Devil Fish,en,A murdered woman is reincarnated as a fish . . .,,1984-01-01,,0.600,0,0.0,0,0,0,Released,False,,"Fantasy, Horror",,
32388,1020758,Leningrad: Kolshchik,Leningrad: The Tatoo Artist,en,Going on in reverse this music video reveals s...,,2017-02-14,,0.600,1,8.0,0,0,4,Released,False,,"Action, Comedy, Horror, Music",,


In [16]:
horror_movies['release_date'] = pd.to_datetime(horror_movies['release_date']) #changing release_date data type to datetime
horror_movies.loc[horror_movies.release_date.isin(['2022'])] #checking to see which movies were released in a certain year

  horror_movies.loc[horror_movies.release_date.isin(['2022'])] #checking to see which movies were released in a certain year


Unnamed: 0,id,original_title,title,original_language,overview,tagline,release_date,poster_path,popularity,vote_count,vote_average,budget,revenue,runtime,status,adult,backdrop_path,genre_names,collection,collection_name
10958,938850,Close Your Eyes,Close Your Eyes,en,A musician's quiet evening turns into a night ...,,2022-01-01,/90oMnDVGN38MPvgFtd8m2jZl9HH.jpg,1.512,0,0.0,0,0,3,Released,False,,Horror,,
14859,863848,Audition: Found Footage Film,Audition: Found Footage Film,en,"A ""Director"" held and filmed auditions in 2009...",See Terror For Real,2022-01-01,/uXGMv8XEi1PmALLG1GlyM3fSPy3.jpg,0.96,0,0.0,0,0,83,Released,False,,"Horror, Thriller",,
16776,850683,V F C,V F C,fr,,How does your body sound like when you're afraid?,2022-01-01,,0.749,0,0.0,0,0,0,Released,False,/aft6sQiWCzyNa4LtS8zHc8ON2Ic.jpg,"Drama, Horror",,
17044,853810,Fucky,Fucky,en,upcoming horror movie by Timo Rose,,2022-01-01,/3Fl9ZlUpK8FQrXlHB7aFmSehzvr.jpg,0.717,0,0.0,0,0,0,Released,False,,"Drama, Horror",,
17210,945423,ఇందువదన,Induvadana,te,Vasu is a forest officer who falls in love wit...,,2022-01-01,/eyYNrfyMQZcp8s49QE34gfVTztc.jpg,0.703,1,8.0,0,0,123,Released,False,/dYvlEDAhTSsAdU88AWqcYoTpUFC.jpg,"Horror, Mystery, Romance",,
17575,885460,The Chosen One,The Chosen One,en,After impulsively shaving off his beard and si...,It's much easier to shave your beard than to s...,2022-01-01,,0.673,0,0.0,30,0,20,Released,False,,"Drama, Horror",,
17668,1031393,Faces of Dying V,Faces of Dying V,en,More people's murders and suicides are caught ...,,2022-01-01,/xsxFefamqRITLVF3DW0o1yZPRwK.jpg,0.666,0,0.0,0,0,45,Released,False,,Horror,,
17825,928782,Speedball Nosferatu,Speedball Nosferatu,xx,"One hundred years later, Count Orlok returns f...",Vampire grime vignette,2022-01-01,/s0VcJwerCjWgAkjczKJuUcfIMRb.jpg,0.657,0,0.0,0,0,13,Released,False,/ie5B4wwt18b3uKFgdDHDV2I58fq.jpg,"Fantasy, Horror, Mystery, Thriller",,
18070,799051,Orcus,Orcus,en,A priest gathers together a team of desperate ...,,2022-01-01,/jHDAcWFw2uhhwW0qjLfOlaED4bF.jpg,0.64,0,0.0,0,0,106,Released,False,,Horror,,
31184,938095,Cash Box,Cash Box,en,"Motivated by greed, a somber man unravels a co...",,2022-01-01,/2OyEGigIPoBCferMkjFRtDFKHBp.jpg,0.6,0,0.0,0,0,2,Released,False,/pOGlgC84RaS2fkH4fy864FvCYnB.jpg,"Drama, Horror",,


In [17]:
horror_movies.loc[horror_movies.release_date.notnull()].head() #first five entries that do not have a null value for 'release_date'

horror_movies.loc[horror_movies.release_date.isnull()] #all entries with a null value for 'release_date'

Unnamed: 0,id,original_title,title,original_language,overview,tagline,release_date,poster_path,popularity,vote_count,vote_average,budget,revenue,runtime,status,adult,backdrop_path,genre_names,collection,collection_name


### Assigning data

In [18]:
horror_movies['status'] = 'released' #changes every entry under the column 'status' to 'released

horror_movies['index_backwards'] = range(len(horror_movies), 0, -1) #displays everything in a reverse index order
horror_movies['index_backwards']

0        32540
1        32539
2        32538
3        32537
4        32536
         ...  
32535        5
32536        4
32537        3
32538        2
32539        1
Name: index_backwards, Length: 32540, dtype: int64

# Summary Functions and Maps

### Summary functions

In [19]:
horror_movies.release_date.describe() #stats for a single column

#I can also get just one stat:
horror_movies.release_date.min()

#or unique values:
horror_movies.release_date.unique()

#unique values and how often they occur:
horror_movies.release_date.value_counts()

release_date
1989-01-01    152
1988-01-01    134
2010-01-01    113
2008-01-01    110
1987-01-01    109
             ... 
2010-08-07      1
2006-01-05      1
1978-10-26      1
1957-09-27      1
2022-02-16      1
Name: count, Length: 10999, dtype: int64

### Maps
A function that takes one set of values and "maps" them to another set of values.

In [20]:
hm_vote_mean = horror_movies.vote_average.mean() #This calculates the mean for the vote_average column and stores it in a variable called hm_vote_mean.

'''
The below code:
1. Applies mapping to the vote_average column of the horror_movies dataframe.
2. Uses lambda to create a small function to be used in the moment.
3. Defines the function. This function will take each entry ('v') in the vote_average column and subtract the mean for that column.
4. This allows us to see how far above or below the mean each movie is.
'''
horror_movies.vote_average.map(lambda v: v - hm_vote_mean)

#We can add this information to a new column called rating_vs_mean
horror_movies['rating_vs_mean'] = horror_movies.vote_average.map(lambda v: v - hm_vote_mean)

#Let's display it!
horror_movies[['title', 'rating_vs_mean']]

Unnamed: 0,title,rating_vs_mean
0,Orphan: First Kill,3.564272
1,Beast,3.764272
2,Smile,3.464272
3,The Black Phone,4.564272
4,Presences,3.664272
...,...,...
32535,Catch a Butcher,-3.335728
32536,Shaye Saint John: The Triggers Compilation,-3.335728
32537,Welcome to the Occult Forest,-3.335728
32538,The Mean One,-3.335728


### Apply method
The equivalent method to map() if we want to transform a whole DataFrame by calling a custom method for each row.

In [23]:
def remean_votes(row): #defining a function that takes a single row of data from the DataFrame as its argument
    row.vote_average - hm_vote_mean #modifies the vote_average column by subtracting the hm_vote_mean value (declared in the cell above) from it
    return row

'''
apply() will 
1. loop through each row of the DataFrame
2. pass each row to the remean_votes function
3. update the vote_average column in each row by subtracting the mean
4. return a new DataFrame where the user_votes have been adjusted
'''
horror_movies.apply(remean_votes, axis='columns') 

#Note: This does not permanently change the DataFrame.


Unnamed: 0,id,original_title,title,original_language,overview,tagline,release_date,poster_path,popularity,vote_count,...,revenue,runtime,status,adult,backdrop_path,genre_names,collection,collection_name,index_backwards,rating_vs_mean
0,760161,Orphan: First Kill,Orphan: First Kill,en,After escaping from an Estonian psychiatric fa...,There's always been something wrong with Esther.,2022-07-27,/pHkKbIRoCe7zIFvqan9LFSaQAde.jpg,5088.584,902,...,9572765,99,released,False,/5GA3vV1aWWHTSDO5eno8V5zDo8r.jpg,"Horror, Thriller",760193.0,Orphan Collection,32540,3.564272
1,760741,Beast,Beast,en,A recently widowed man and his two teenage dau...,Fight for family.,2022-08-11,/xIGr7UHsKf0URWmyyd5qFMAq4d8.jpg,2172.338,584,...,56000000,93,released,False,/2k9tBql5GYH328Krj66tDT9LtFZ.jpg,"Adventure, Drama, Horror",,,32539,3.764272
2,882598,Smile,Smile,en,"After witnessing a bizarre, traumatic incident...","Once you see it, it’s too late.",2022-09-23,/hiaeZKzwsk4y4atFhmncO5KRxeT.jpg,1863.628,114,...,45000000,115,released,False,/mVNPfpydornVe4H4UCIk7WevWjf.jpg,"Horror, Mystery, Thriller",,,32538,3.464272
3,756999,The Black Phone,The Black Phone,en,"Finney Blake, a shy but clever 13-year-old boy...",Never talk to strangers.,2022-06-22,/lr11mCT85T1JanlgjMuhs9nMht4.jpg,1071.398,2736,...,161000000,103,released,False,/AfvIjhDu9p64jKcmohS4hsPG95Q.jpg,"Horror, Thriller",,,32537,4.564272
4,772450,Presencias,Presences,es,A man who loses his wife and goes to seclude h...,,2022-09-07,/dgDT3uol3mdvwEg0jt1ble3l9hw.jpg,1020.995,83,...,0,0,released,False,/ojfzhdwRemcDt1I6pao6vVLw9AA.jpg,Horror,,,32536,3.664272
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32535,1032201,Catch a Butcher,Catch a Butcher,en,In a maternity ward in nineteenth-century Indi...,,2021-11-20,/4LHrYpe6bjiT8Wvkzl3269LIUZl.jpg,0.600,0,...,0,11,released,False,,Horror,,,5,-3.335728
32536,1032667,Shaye Saint John: The Triggers Compilation,Shaye Saint John: The Triggers Compilation,en,A bizarre mix of early Tod Hayne's and Phyllis...,"""Just keep in mind, that you'll be trying to s...",2006-10-30,/j1vWwxnTsoKjZFwJODiNHNS8GFK.jpg,0.600,0,...,0,92,released,False,,"Fantasy, Horror",,,4,-3.335728
32537,1032890,Welcome to the Occult Forest,Welcome to the Occult Forest,en,A washed up horror film director named Kôji Ku...,"Murders, crimes, laughter, psychics, superhero...",2022-07-22,/ngFyxdliPZrG3Qra7tW3LbWksFd.jpg,0.600,0,...,0,141,released,False,/s8Kpai084gCgQlsT3bpaEYuJmDc.jpg,"Comedy, Drama, Horror",,,3,-3.335728
32538,1033052,The Mean One,The Mean One,en,"The Mean One is a hairy, green-skinned grump i...",,2022-12-15,,0.000,0,...,0,0,released,False,,Horror,,,2,-3.335728


### Operators
You can use operators to make faster changes than map() or apply().  
However, they aren't flexible enough for advanced things like conditional logic.

In [24]:
horror_movies.original_title + '/' + horror_movies.title

0                    Orphan: First Kill/Orphan: First Kill
1                                              Beast/Beast
2                                              Smile/Smile
3                          The Black Phone/The Black Phone
4                                     Presencias/Presences
                               ...                        
32535                      Catch a Butcher/Catch a Butcher
32536    Shaye Saint John: The Triggers Compilation/Sha...
32537    Welcome to the Occult Forest/Welcome to the Oc...
32538                            The Mean One/The Mean One
32539                        Secret Formula/Secret Formula
Length: 32540, dtype: object