In [1]:
import pandas as pd

df = pd.read_csv('story_data.csv')

In [2]:
df.shape

(18248, 12)

We have 18,248 stories (samples) and 12 features

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18248 entries, 0 to 18247
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   url             18248 non-null  object
 1   story_id        18248 non-null  object
 2   soup            18248 non-null  object
 3   title           18248 non-null  object
 4   author          18246 non-null  object
 5   contest_num     18248 non-null  object
 6   won_contest     236 non-null    object
 7   categories      18248 non-null  object
 8   num_likes       18248 non-null  int64 
 9   num_comments    18248 non-null  int64 
 10  story_html      18248 non-null  object
 11  date_published  18248 non-null  object
dtypes: int64(2), object(10)
memory usage: 1.7+ MB


'won_contest' only has a value if the story was a winner or shortlisted

In [4]:
df.head()

Unnamed: 0,url,story_id,soup,title,author,contest_num,won_contest,categories,num_likes,num_comments,story_html,date_published
0,https://blog.reedsy.com/short-story/n1pl7n/,n1pl7n,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,11:59,Krystal Pepper,"['Contest', '#137']",,"['Crime', 'Fiction', 'Thriller']",17,3,"<article class=""font-alt submission-content sp...","March 11, 2022 17:27"
1,https://blog.reedsy.com/short-story/yv1ahb/,yv1ahb,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,Want Ads,Nina Wishnat,"['Contest', '#30']",,"['Contemporary', 'Fiction']",1,0,"<article class=""font-alt submission-content sp...","February 28, 2020 15:27"
2,https://blog.reedsy.com/short-story/kv1cr7/,kv1cr7,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,To be wrong because of sincerity...,Lis Lovén,"['Contest', '#102']",,"['Black', 'Contemporary', 'Fiction']",12,0,"<article class=""font-alt submission-content sp...","July 10, 2021 14:21"
3,https://blog.reedsy.com/short-story/v2nqtq/,v2nqtq,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,"The Shiva, 1955",Tammy Kl,"['Contest', '#100']",,"['American', 'Coming', 'of', 'Age', 'Fiction']",6,0,"<article class=""font-alt submission-content sp...","July 01, 2021 18:13"
4,https://blog.reedsy.com/short-story/nptt18/,nptt18,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,The Monster of Greentree,Best Christopher,"['Contest', '#102']",,"['Coming', 'of', 'Age', 'Adventure', 'Fiction']",6,0,"<article class=""font-alt submission-content sp...","July 17, 2021 01:23"


In [5]:
df.describe()

Unnamed: 0,num_likes,num_comments
count,18248.0,18248.0
mean,14.1387,4.295758
std,18.742977,17.676338
min,0.0,0.0
25%,7.0,0.0
50%,10.0,1.0
75%,14.0,3.0
max,503.0,629.0


two numeric features: 'num_likes' is target feature for regression problem

features to extract:
tokenize words
word related features


extracting some word-related features:

In [6]:
from bs4 import BeautifulSoup
from nltk.tokenize import wordpunct_tokenize, sent_tokenize, word_tokenize
import numpy as np

import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Batra\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [7]:
def separate_words_and_punct(arr_tokenized_text):
    words = []
    punctuation = []
    for i, value in enumerate(arr_tokenized_text):
        if len(value) == 1 and not value.isalpha():
            punctuation.append(i)
        else:
            words.append(i)
    return arr_tokenized_text[words], arr_tokenized_text[punctuation]

In [8]:
def extract_word_feats(story_text):
    story_word_toks = word_tokenize(story_text)
    words, punctuation = separate_words_and_punct(np.array(story_word_toks, dtype='object'))
    num_words = len(words)
    unique_words = set(words)
    num_unique_words = len(unique_words)
    unique_words_percent = num_unique_words / num_words
    return story_word_toks, words, punctuation, num_words, unique_words, num_unique_words, unique_words_percent

In [9]:
def extract_sentence_feats(story_text):
    story_sent = sent_tokenize(story_text)
    num_sent = len(story_sent)
    words_per_sent = np.array([(len(word_tokenize(t)), word_tokenize(t)) for t in story_sent], dtype='object')
    sent_lengths = words_per_sent[:, 0].astype(int)
    sent_words = words_per_sent[:, 1]
    sent_avg_length = sent_lengths.mean()
    return story_sent, num_sent, sent_lengths, sent_avg_length, sent_words

In [10]:
def tokenize_story(row):
    # extracting story from html
    html = row['story_html']
    soup = BeautifulSoup(html, "html.parser")
    story = soup.get_text()
    # tokenizing by sentence
    story_sent, num_sent, sent_lengths, sent_avg_length, sent_words = extract_sentence_feats(story)
    # tokenizing by word
    story_word_toks, words, punctuation, num_words, unique_words, num_unique_words, unique_words_percent = extract_word_feats(story)
        
    feat_dict = {'num_sent': num_sent, 'story_sent': story_sent, 'sent_lengths': sent_lengths, 
                 'sent_avg_length': sent_avg_length, 'sent_words': sent_words, 'story_word_toks': story_word_toks, 
                 'words': words, 'punctuation': punctuation, 'num_words': num_words, 'unique_words': unique_words, 
                 'num_unique_words': num_unique_words, 'unique_words_percent': unique_words_percent}
    return pd.DataFrame.from_dict(feat_dict, orient='index').transpose()

In [11]:
new_feats = ('num_sent', 'words', 'num_words', 'unique_words', 'num_unique_words', 'unique_words_percent','punctuation')
results = df.apply(tokenize_story, axis=1)

In [12]:
feats_df = pd.DataFrame()
for sub in results:
    feats_df = pd.concat([feats_df, sub])
feats_df = feats_df.reset_index()
feats_df.drop('index', axis=1, inplace=True)
feats_df.head()

Unnamed: 0,num_sent,story_sent,sent_lengths,sent_avg_length,sent_words,story_word_toks,words,punctuation,num_words,unique_words,num_unique_words,unique_words_percent
0,125,[\n11:59 \n\tI shoot straight out of bed to a...,"[17, 10, 16, 12, 14, 21, 25, 23, 12, 11, 36, 5...",24.08,"[[11:59, I, shoot, straight, out, of, bed, to,...","[11:59, I, shoot, straight, out, of, bed, to, ...","[11:59, I, shoot, straight, out, of, bed, to, ...","[., ., ., ., ., ,, ,, ., ,, ,, ., ,, ., ,, ., ...",2645,"{details, ago, assault, drawn, to, holding, pi...",774,0.292628
1,87,"[\nMia’s room was pretty much bare., She lay o...","[9, 19, 4, 23, 23, 9, 5, 6, 13, 12, 19, 8, 13,...",19.54023,"[[Mia, ’, s, room, was, pretty, much, bare, .]...","[Mia, ’, s, room, was, pretty, much, bare, ., ...","[Mia, s, room, was, pretty, much, bare, She, l...","[’, ., ,, ., ., ,, ., ., ., ., ., “, ?, ’, ,, ...",1453,"{misfortune, until, to, turquoise, waited, fas...",619,0.426015
2,150,"[\n \nLin had to figure out things, because ob...","[26, 27, 10, 23, 24, 27, 21, 2, 16, 18, 20, 26...",15.006667,"[[Lin, had, to, figure, out, things, ,, becaus...","[Lin, had, to, figure, out, things, ,, because...","[Lin, had, to, figure, out, things, because, o...","[,, ,, ., ,, ,, ., ., ,, ., ,, ,, ., ,, ,, ., ...",2008,"{escape, beware, null, to, solve, fall, method...",676,0.336653
3,153,"[\n The Shiva, 1955\n \n\tFor the rest of her ...","[24, 12, 36, 12, 23, 19, 49, 20, 28, 27, 26, 5...",21.346405,"[[The, Shiva, ,, 1955, For, the, rest, of, her...","[The, Shiva, ,, 1955, For, the, rest, of, her,...","[The, Shiva, 1955, For, the, rest, of, her, li...","[,, ,, ., ,, ., –, ,, ,, ,, ’, ., (, !, ), ,, ...",2735,"{Maybe, reassemble, well, drawn, details, lips...",994,0.363437
4,186,[\nThe wind whipped through my hair as I sped ...,"[20, 17, 15, 17, 14, 26, 15, 27, 15, 21, 13, 7...",17.951613,"[[The, wind, whipped, through, my, hair, as, I...","[The, wind, whipped, through, my, hair, as, I,...","[The, wind, whipped, through, my, hair, as, I,...","[’, ,, ., ., ,, ,, ., ,, ,, ., ,, ., ,, ,, ’, ...",2773,"{well, Knight, popped, ago, until, to, equally...",863,0.311215


In [13]:
feats_df.shape, df.shape

((18248, 12), (18248, 12))

12 new features extracted

In [14]:
new_df = pd.concat([df, feats_df], axis=1)
new_df

Unnamed: 0,url,story_id,soup,title,author,contest_num,won_contest,categories,num_likes,num_comments,...,sent_lengths,sent_avg_length,sent_words,story_word_toks,words,punctuation,num_words,unique_words,num_unique_words,unique_words_percent
0,https://blog.reedsy.com/short-story/n1pl7n/,n1pl7n,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,11:59,Krystal Pepper,"['Contest', '#137']",,"['Crime', 'Fiction', 'Thriller']",17,3,...,"[17, 10, 16, 12, 14, 21, 25, 23, 12, 11, 36, 5...",24.08,"[[11:59, I, shoot, straight, out, of, bed, to,...","[11:59, I, shoot, straight, out, of, bed, to, ...","[11:59, I, shoot, straight, out, of, bed, to, ...","[., ., ., ., ., ,, ,, ., ,, ,, ., ,, ., ,, ., ...",2645,"{details, ago, assault, drawn, to, holding, pi...",774,0.292628
1,https://blog.reedsy.com/short-story/yv1ahb/,yv1ahb,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,Want Ads,Nina Wishnat,"['Contest', '#30']",,"['Contemporary', 'Fiction']",1,0,...,"[9, 19, 4, 23, 23, 9, 5, 6, 13, 12, 19, 8, 13,...",19.54023,"[[Mia, ’, s, room, was, pretty, much, bare, .]...","[Mia, ’, s, room, was, pretty, much, bare, ., ...","[Mia, s, room, was, pretty, much, bare, She, l...","[’, ., ,, ., ., ,, ., ., ., ., ., “, ?, ’, ,, ...",1453,"{misfortune, until, to, turquoise, waited, fas...",619,0.426015
2,https://blog.reedsy.com/short-story/kv1cr7/,kv1cr7,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,To be wrong because of sincerity...,Lis Lovén,"['Contest', '#102']",,"['Black', 'Contemporary', 'Fiction']",12,0,...,"[26, 27, 10, 23, 24, 27, 21, 2, 16, 18, 20, 26...",15.006667,"[[Lin, had, to, figure, out, things, ,, becaus...","[Lin, had, to, figure, out, things, ,, because...","[Lin, had, to, figure, out, things, because, o...","[,, ,, ., ,, ,, ., ., ,, ., ,, ,, ., ,, ,, ., ...",2008,"{escape, beware, null, to, solve, fall, method...",676,0.336653
3,https://blog.reedsy.com/short-story/v2nqtq/,v2nqtq,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,"The Shiva, 1955",Tammy Kl,"['Contest', '#100']",,"['American', 'Coming', 'of', 'Age', 'Fiction']",6,0,...,"[24, 12, 36, 12, 23, 19, 49, 20, 28, 27, 26, 5...",21.346405,"[[The, Shiva, ,, 1955, For, the, rest, of, her...","[The, Shiva, ,, 1955, For, the, rest, of, her,...","[The, Shiva, 1955, For, the, rest, of, her, li...","[,, ,, ., ,, ., –, ,, ,, ,, ’, ., (, !, ), ,, ...",2735,"{Maybe, reassemble, well, drawn, details, lips...",994,0.363437
4,https://blog.reedsy.com/short-story/nptt18/,nptt18,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,The Monster of Greentree,Best Christopher,"['Contest', '#102']",,"['Coming', 'of', 'Age', 'Adventure', 'Fiction']",6,0,...,"[20, 17, 15, 17, 14, 26, 15, 27, 15, 21, 13, 7...",17.951613,"[[The, wind, whipped, through, my, hair, as, I...","[The, wind, whipped, through, my, hair, as, I,...","[The, wind, whipped, through, my, hair, as, I,...","[’, ,, ., ., ,, ,, ., ,, ,, ., ,, ., ,, ,, ’, ...",2773,"{well, Knight, popped, ago, until, to, equally...",863,0.311215
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18243,https://blog.reedsy.com/short-story/8u5rv6/,8u5rv6,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,The Silver Swing,Serianna French,"['Contest', '#96']",,"['Fantasy', 'Fiction']",9,0,...,"[21, 16, 11, 12, 27, 64, 8, 5, 7, 8, 5, 10, 19...",14.737805,"[[Up, two, flights, ,, down, the, hall, ,, tur...","[Up, two, flights, ,, down, the, hall, ,, turn...","[Up, two, flights, down, the, hall, turn, righ...","[,, ,, ,, 6, 4, ,, 5, ,, 6, ., ., ,, ., ., ,, ...",2113,"{pool, interior, until, to, picture, Pearly, w...",747,0.353526
18244,https://blog.reedsy.com/short-story/y5ahp0/,y5ahp0,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,Do it for the views,Vameerah Darren,"['Contest', '#65']",,"['Horror', 'Fiction']",14,2,...,"[7, 35, 77, 3, 73, 28, 13, 23, 19, 13, 23, 17,...",23.373494,"[[“, What, ’, s, up, guys, !], [Welcome, back,...","[“, What, ’, s, up, guys, !, Welcome, back, to...","[What, s, up, guys, Welcome, back, to, dareswi...","[“, ’, !, ‘, ’, !, ”, ., ,, “, ’, ”, ,, “, ’, ...",1573,"{ago, lips, Smiling, to, fast, grown, greeted,...",637,0.404959
18245,https://blog.reedsy.com/short-story/w5a4il/,w5a4il,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,Lessons for a Medium in Training,Carrie Fitzgerald,"['Contest', '#105']",['Contest #105 shortlist ⭐️'],"['Fiction', 'Speculative']",29,4,...,"[14, 6, 20, 16, 19, 11, 6, 26, 19, 5, 7, 16, 2...",12.351648,"[[Just, because, someone, is, dead, doesn, ’, ...","[Just, because, someone, is, dead, doesn, ’, t...","[Just, because, someone, is, dead, doesn, t, m...","[’, ., ,, ., ,, ’, ., ,, ., ’, ., ’, ., ., ,, ...",1831,"{Maybe, well, psychic, awkward, to, holding, f...",602,0.328782
18246,https://blog.reedsy.com/short-story/bivfbi/,bivfbi,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,Jake &amp; Charlie,Amélie Brown,"['Contest', '#83']",,"['Fiction', 'Romance', 'Teens', '&', 'Young', ...",4,0,...,"[20, 29, 23, 30, 47, 11, 7, 26, 18, 8, 43, 23,...",21.26087,"[[I, could, feel, the, cool, breeze, on, my, s...","[I, could, feel, the, cool, breeze, on, my, sk...","[I, could, feel, the, cool, breeze, on, my, sk...","[’, ., ,, ,, ,, ., “, ”, ,, ,, ., “, ’, ”, “, ...",1157,"{please., popped, bubs…, until, to, fall, clot...",530,0.458081


extracting and encoding categories

In [15]:
def find_num_cats(row):
    return len(row['categories'].split())

In [16]:
new_df['num_cats'] = new_df.apply(find_num_cats, axis=1)

In [17]:
new_df.head()

Unnamed: 0,url,story_id,soup,title,author,contest_num,won_contest,categories,num_likes,num_comments,...,sent_avg_length,sent_words,story_word_toks,words,punctuation,num_words,unique_words,num_unique_words,unique_words_percent,num_cats
0,https://blog.reedsy.com/short-story/n1pl7n/,n1pl7n,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,11:59,Krystal Pepper,"['Contest', '#137']",,"['Crime', 'Fiction', 'Thriller']",17,3,...,24.08,"[[11:59, I, shoot, straight, out, of, bed, to,...","[11:59, I, shoot, straight, out, of, bed, to, ...","[11:59, I, shoot, straight, out, of, bed, to, ...","[., ., ., ., ., ,, ,, ., ,, ,, ., ,, ., ,, ., ...",2645,"{details, ago, assault, drawn, to, holding, pi...",774,0.292628,3
1,https://blog.reedsy.com/short-story/yv1ahb/,yv1ahb,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,Want Ads,Nina Wishnat,"['Contest', '#30']",,"['Contemporary', 'Fiction']",1,0,...,19.54023,"[[Mia, ’, s, room, was, pretty, much, bare, .]...","[Mia, ’, s, room, was, pretty, much, bare, ., ...","[Mia, s, room, was, pretty, much, bare, She, l...","[’, ., ,, ., ., ,, ., ., ., ., ., “, ?, ’, ,, ...",1453,"{misfortune, until, to, turquoise, waited, fas...",619,0.426015,2
2,https://blog.reedsy.com/short-story/kv1cr7/,kv1cr7,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,To be wrong because of sincerity...,Lis Lovén,"['Contest', '#102']",,"['Black', 'Contemporary', 'Fiction']",12,0,...,15.006667,"[[Lin, had, to, figure, out, things, ,, becaus...","[Lin, had, to, figure, out, things, ,, because...","[Lin, had, to, figure, out, things, because, o...","[,, ,, ., ,, ,, ., ., ,, ., ,, ,, ., ,, ,, ., ...",2008,"{escape, beware, null, to, solve, fall, method...",676,0.336653,3
3,https://blog.reedsy.com/short-story/v2nqtq/,v2nqtq,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,"The Shiva, 1955",Tammy Kl,"['Contest', '#100']",,"['American', 'Coming', 'of', 'Age', 'Fiction']",6,0,...,21.346405,"[[The, Shiva, ,, 1955, For, the, rest, of, her...","[The, Shiva, ,, 1955, For, the, rest, of, her,...","[The, Shiva, 1955, For, the, rest, of, her, li...","[,, ,, ., ,, ., –, ,, ,, ,, ’, ., (, !, ), ,, ...",2735,"{Maybe, reassemble, well, drawn, details, lips...",994,0.363437,5
4,https://blog.reedsy.com/short-story/nptt18/,nptt18,<!DOCTYPE html>\n\n<html>\n<head>\n<meta chars...,The Monster of Greentree,Best Christopher,"['Contest', '#102']",,"['Coming', 'of', 'Age', 'Adventure', 'Fiction']",6,0,...,17.951613,"[[The, wind, whipped, through, my, hair, as, I...","[The, wind, whipped, through, my, hair, as, I,...","[The, wind, whipped, through, my, hair, as, I,...","[’, ,, ., ., ,, ,, ., ,, ,, ., ,, ., ,, ,, ’, ...",2773,"{well, Knight, popped, ago, until, to, equally...",863,0.311215,5


In [18]:
new_df.shape

(18248, 25)

In [19]:
import re
cats = set()
for sample, num in new_df[['categories', 'num_cats']].values:
    samp_cats = re.findall('[A-Z][a-z]+', sample)
    cats.update(samp_cats)
print(f'we have {len(cats)} categories')

we have 49 categories


In [20]:
for cat in cats:
    new_df[f'cat_{cat}'] = (new_df['categories']).astype('category').str.contains(cat).astype(int)
new_df.shape

(18248, 74)

In [21]:
new_df['cat_Fiction'].sum() == len(new_df)

True

In [22]:
new_df.drop('categories', axis=1, inplace=True)
new_df.shape

(18248, 73)

encoding date as datetime

In [23]:
new_df['date_published'] = pd.to_datetime(new_df['date_published'])
new_df['date_published']

0       2022-03-11 17:27:00
1       2020-02-28 15:27:00
2       2021-07-10 14:21:00
3       2021-07-01 18:13:00
4       2021-07-17 01:23:00
                ...        
18243   2021-06-04 06:50:00
18244   2020-10-28 18:39:00
18245   2021-08-05 16:07:00
18246   2021-03-05 16:14:00
18247   2020-11-17 03:13:00
Name: date_published, Length: 18248, dtype: datetime64[ns]

In [24]:
new_df.shape

(18248, 73)

In [25]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18248 entries, 0 to 18247
Data columns (total 73 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   url                   18248 non-null  object        
 1   story_id              18248 non-null  object        
 2   soup                  18248 non-null  object        
 3   title                 18248 non-null  object        
 4   author                18246 non-null  object        
 5   contest_num           18248 non-null  object        
 6   won_contest           236 non-null    object        
 7   num_likes             18248 non-null  int64         
 8   num_comments          18248 non-null  int64         
 9   story_html            18248 non-null  object        
 10  date_published        18248 non-null  datetime64[ns]
 11  num_sent              18248 non-null  object        
 12  story_sent            18248 non-null  object        
 13  sent_lengths    

In [26]:
new_df.describe()

Unnamed: 0,num_likes,num_comments,num_cats,cat_Adventure,cat_Desi,cat_Bedtime,cat_East,cat_American,cat_Nonfiction,cat_Creative,...,cat_Romance,cat_Adult,cat_Transgender,cat_Age,cat_Contemporary,cat_Fantasy,cat_African,cat_Horror,cat_Christmas,cat_Mystery
count,18248.0,18248.0,18248.0,18248.0,18248.0,18248.0,18248.0,18248.0,18248.0,18248.0,...,18248.0,18248.0,18248.0,18248.0,18248.0,18248.0,18248.0,18248.0,18248.0,18248.0
mean,14.1387,4.295758,2.95046,0.06735,0.005206,0.009864,0.002959,0.045101,0.006247,0.006247,...,0.092339,0.050964,0.001425,0.044991,0.123959,0.143577,0.005425,0.046526,0.010083,0.051019
std,18.742977,17.676338,1.175209,0.250634,0.071967,0.09883,0.05432,0.207531,0.078794,0.078794,...,0.289512,0.219931,0.037721,0.207291,0.329544,0.35067,0.073458,0.210626,0.099911,0.220043
min,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,7.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,10.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,14.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,503.0,629.0,8.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [27]:
# new_df.to_csv('story_data_new_feats.csv', index=False)

In [28]:
# other = pd.read_csv('story_data_new_feats.csv')

In [29]:
# other.shape

(18248, 73)