# EDA

## Listing of Tables

- books

- authors

- series

- genres

- interactions

- book_id_map

---

In [25]:
import gzip
import csv
import json
import re
import os
import sys
import numpy as np
import pandas as pd
from collections import defaultdict

dir = {your file directory here}

In [6]:
# General function to load in GZ archive
def load_data(file_name, head = 500):
    count = 0
    data = []
    with gzip.open(file_name) as fin:
        for l in fin:
            d = json.loads(l)
            count += 1
            data.append(d)
            
            # break if reaches the 100th line
            if (head is not None) and (count > head):
                break
    return data

In [9]:
# Count json.gz rows
def count_gzj(file):
    count = 0
    with gzip.open(file) as fin:
        for l in fin:
            count += 1
    return count

---

## Books

This needs some processing (see below)

In [11]:
print(count_gzj(os.path.join(dir, 'goodreads_books.json.gz')))

2360655


In [12]:
books = load_data(os.path.join(dir, 'goodreads_books.json.gz'), 0)
print(books[0].keys())

dict_keys(['isbn', 'text_reviews_count', 'series', 'country_code', 'language_code', 'popular_shelves', 'asin', 'is_ebook', 'average_rating', 'kindle_asin', 'similar_books', 'description', 'format', 'link', 'authors', 'publisher', 'num_pages', 'publication_day', 'isbn13', 'publication_month', 'edition_information', 'publication_year', 'url', 'image_url', 'book_id', 'ratings_count', 'work_id', 'title', 'title_without_series'])


In [13]:
booksDf = pd.DataFrame(books)
booksDf = booksDf.drop(['popular_shelves', 'asin', 'kindle_asin', 'is_ebook', 'link', 'edition_information', 'url'], axis=1)
booksDf

Unnamed: 0,isbn,text_reviews_count,series,country_code,language_code,average_rating,similar_books,description,format,authors,...,publication_day,isbn13,publication_month,publication_year,image_url,book_id,ratings_count,work_id,title,title_without_series
0,312853122,1,[],US,,4.0,[],,Paperback,"[{'author_id': '604031', 'role': ''}]",...,1,9780312853129,9,1984,https://images.gr-assets.com/books/1310220028m...,5333265,3,5400751,W.C. Fields: A Life on Film,W.C. Fields: A Life on Film


In [15]:
# Problem Fields
booksDf[['series','similar_books','authors']]

Unnamed: 0,series,similar_books,authors
0,[],[],"[{'author_id': '604031', 'role': ''}]"


### Modify Books with more usable fields for 'series', 'similar_books' and 'authors'

In [17]:
def load_books(file_name, head = 500):
    count = 0
    data = []
    with gzip.open(file_name) as fin:
        for l in fin:
            d = json.loads(l)
            d['series'] = ','.join([str(i) for i in d['series']])
            d['similar_books'] = ','.join([str(i) for i in d['similar_books']])
            temp = d['authors']
            d.pop('authors')
            dd = defaultdict(list)
            print(temp)
            for di in temp:
                for k, v in di.items():
                    dd[k].append(v)
            for k, v in dd.items():
                d[k] = ','.join([str(i) for i in v])
            count += 1
            data.append(d)
            
            # break if reaches the 100th line
            if (head is not None) and (count > head):
                break
    return data

In [19]:
books_mod = load_books(os.path.join(dir, 'goodreads_books.json.gz'), 0)
books_mod

[{'author_id': '604031', 'role': ''}]


[{'isbn': '0312853122',
  'text_reviews_count': '1',
  'series': '',
  'country_code': 'US',
  'language_code': '',
  'popular_shelves': [{'count': '3', 'name': 'to-read'},
   {'count': '1', 'name': 'p'},
   {'count': '1', 'name': 'collection'},
   {'count': '1', 'name': 'w-c-fields'},
   {'count': '1', 'name': 'biography'}],
  'asin': '',
  'is_ebook': 'false',
  'average_rating': '4.00',
  'kindle_asin': '',
  'similar_books': '',
  'description': '',
  'format': 'Paperback',
  'link': 'https://www.goodreads.com/book/show/5333265-w-c-fields',
  'publisher': "St. Martin's Press",
  'num_pages': '256',
  'publication_day': '1',
  'isbn13': '9780312853129',
  'publication_month': '9',
  'edition_information': '',
  'publication_year': '1984',
  'url': 'https://www.goodreads.com/book/show/5333265-w-c-fields',
  'image_url': 'https://images.gr-assets.com/books/1310220028m/5333265.jpg',
  'book_id': '5333265',
  'ratings_count': '3',
  'work_id': '5400751',
  'title': 'W.C. Fields: A Life 

In [20]:
booksModDf = pd.DataFrame(books_mod)
booksModDf = booksModDf.drop(['popular_shelves', 'asin', 'kindle_asin', 'is_ebook', 'link', 'edition_information', 'url'], axis=1)
booksModDf

Unnamed: 0,isbn,text_reviews_count,series,country_code,language_code,average_rating,similar_books,description,format,publisher,...,publication_month,publication_year,image_url,book_id,ratings_count,work_id,title,title_without_series,author_id,role
0,312853122,1,,US,,4.0,,,Paperback,St. Martin's Press,...,9,1984,https://images.gr-assets.com/books/1310220028m...,5333265,3,5400751,W.C. Fields: A Life on Film,W.C. Fields: A Life on Film,604031,


In [21]:
# Fixed Fields
booksModDf[['series', 'similar_books', 'author_id', 'role']]

Unnamed: 0,series,similar_books,author_id,role
0,,,604031,


---

### Authors

In [4]:
authors = load_data(os.path.join(dir, 'goodreads_book_authors.json.gz'), 0)
authors

[{'average_rating': '3.98',
  'author_id': '604031',
  'text_reviews_count': '7',
  'name': 'Ronald J. Fields',
  'ratings_count': '49'}]

In [22]:
print(count_gzj(os.path.join(dir, 'goodreads_book_authors.json.gz')))

829529


In [5]:
authorsDf = pd.DataFrame(authors)
authorsDf

Unnamed: 0,average_rating,author_id,text_reviews_count,name,ratings_count
0,3.98,604031,7,Ronald J. Fields,49


---

### Series


In [6]:
series = load_data(os.path.join(dir, 'goodreads_book_series.json.gz'), 0)
series

[{'numbered': 'true',
  'note': '',
  'description': '',
  'title': 'Sun Wolf and Starhawk',
  'series_works_count': '9',
  'series_id': '189911',
  'primary_work_count': '3'}]

In [24]:
print(count_gzj(os.path.join(dir, 'goodreads_book_series.json.gz')))

400390


In [7]:
seriesDf = pd.DataFrame(series)
seriesDf

Unnamed: 0,numbered,note,description,title,series_works_count,series_id,primary_work_count
0,True,,,Sun Wolf and Starhawk,9,189911,3


---

### Genres

This needs some processing

In [10]:
genres = load_data(os.path.join(dir, 'goodreads_book_genres_initial.json.gz'), 2)
#Tricky Example
genres[2]

{'book_id': '7327624',
 'genres': {'fantasy, paranormal': 31,
  'fiction': 8,
  'mystery, thriller, crime': 1,
  'poetry': 1}}

In [58]:
print(count_gzj(os.path.join(dir, 'goodreads_book_genres_initial.json.gz')))

2360655


In [14]:
#Normalize
norm = pd.json_normalize(genres[2])
norm

Unnamed: 0,book_id,"genres.fantasy, paranormal",genres.fiction,"genres.mystery, thriller, crime",genres.poetry
0,7327624,31,8,1,1


In [23]:
#Melt
melted = pd.melt(norm, id_vars='book_id', var_name='genre', value_name='count')
melted['genre'] = melted['genre'].str.slice_replace(stop=7)
melted


Unnamed: 0,book_id,genre,count
0,7327624,"fantasy, paranormal",31
1,7327624,fiction,8
2,7327624,"mystery, thriller, crime",1
3,7327624,poetry,1


---

### Interactions (Detailed/Genre Specific)

In [25]:
interactions = load_data(os.path.join(dir, 'goodreads_interactions_fantasy_paranormal.json.gz'), 0)
interactions

[{'user_id': '8842281e1d1347389f2ab93d60773d4d',
  'book_id': '19161852',
  'review_id': '4443cb6883624c3772625ef5b7b4e138',
  'is_read': False,
  'rating': 0,
  'review_text_incomplete': '',
  'date_added': 'Fri Sep 08 10:44:24 -0700 2017',
  'date_updated': 'Fri Sep 08 10:44:24 -0700 2017',
  'read_at': '',
  'started_at': ''}]

In [28]:
interactionsDf = pd.DataFrame(interactions)
interactionsDf = interactionsDf.drop(['review_text_incomplete', 'date_added', 'date_updated', 'read_at', 'started_at'], axis=1)
interactionsDf

Unnamed: 0,user_id,book_id,review_id,is_read,rating
0,8842281e1d1347389f2ab93d60773d4d,19161852,4443cb6883624c3772625ef5b7b4e138,False,0


---

### Interactions

Need to use the csv maps

In [39]:
ints = pd.read_csv(os.path.join(dir, 'goodreads_interactions.csv'), nrows=3)
ints

Unnamed: 0,user_id,book_id,is_read,rating,is_reviewed
0,0,948,1,5,0
1,0,947,1,5,1
2,0,946,1,5,0


In [None]:
ints.isnull().any()

In [27]:
with open(os.path.join(dir, 'goodreads_interactions.csv')) as f:
    count = 0
    fcsv = csv.reader(f)
    headers = next(fcsv)
    print(headers)
    for row in fcsv:
        count += 1
    print(count)

['user_id', 'book_id', 'is_read', 'rating', 'is_reviewed']
228648342


### User Map

In [12]:
userMap = pd.read_csv(os.path.join(dir, 'user_id_map.csv'), nrows=3)
userMap

Unnamed: 0,user_id_csv,user_id
0,0,8842281e1d1347389f2ab93d60773d4d
1,1,72fb0d0087d28c832f15776b0d936598
2,2,ab2923b738ea3082f5f3efcbbfacb218


In [28]:
with open(os.path.join(dir, 'user_id_map.csv')) as f:
    count = 0
    fcsv = csv.reader(f)
    headers = next(fcsv)
    print(headers)
    for row in fcsv:
        count += 1
    print(count)

['user_id_csv', 'user_id']
876145


### book_id_Map

In [34]:
bookMap = pd.read_csv(os.path.join(dir, 'book_id_map.csv'))
bookMap

Unnamed: 0,book_id_csv,book_id
0,0,34684622
1,1,34536488
2,2,34017076
3,3,71730
4,4,30422361
...,...,...
2360645,2360645,19517100
2360646,2360646,18597299
2360647,2360647,18584882
2360648,2360648,18518801


In [36]:
bookMap.isnull().any()

book_id_csv    False
book_id        False
dtype: bool

In [37]:
bookMap.dtypes

book_id_csv    int64
book_id        int64
dtype: object

In [29]:
with open(os.path.join(dir, 'book_id_map.csv')) as f:
    count = 0
    fcsv = csv.reader(f)
    headers = next(fcsv)
    print(headers)
    for row in fcsv:
        count += 1
    print(count)

['book_id_csv', 'book_id']
2360650


---

## Reviews

In [15]:
reviews = load_data(os.path.join(dir, 'goodreads_reviews_fantasy_paranormal.json.gz'), 0)
reviews

[{'user_id': '8842281e1d1347389f2ab93d60773d4d',
  'book_id': '18245960',
  'review_id': 'dfdbb7b0eb5a7e4c26d59a937e2e5feb',
  'rating': 5,
  'review_text': 'This is a special book. It started slow for about the first third, then in the middle third it started to get interesting, then the last third blew my mind. This is what I love about good science fiction - it pushes your thinking about where things can go. \n It is a 2015 Hugo winner, and translated from its original Chinese, which made it interesting in just a different way from most things I\'ve read. For instance the intermixing of Chinese revolutionary history - how they kept accusing people of being "reactionaries", etc. \n It is a book about science, and aliens. The science described in the book is impressive - its a book grounded in physics and pretty accurate as far as I could tell. Though when it got to folding protons into 8 dimensions I think he was just making stuff up - interesting to think about though. \n But what w

In [26]:
reviewsDF = pd.DataFrame(reviews)
reviewsDF = reviewsDF.drop(['date_added', 'date_updated', 'read_at', 'started_at'], axis=1)
reviewsDF

Unnamed: 0,user_id,book_id,review_id,rating,review_text,n_votes,n_comments
0,8842281e1d1347389f2ab93d60773d4d,18245960,dfdbb7b0eb5a7e4c26d59a937e2e5feb,5,This is a special book. It started slow for ab...,28,1


---

## Methodology for loading data into Database

- Can either convert JSON to CSV
- Can load JSON

Both would need a processing step

In [85]:
import dask.dataframe as dd

### Books to CSV

In [56]:
def load_jc(file_name, chunk=10):
    total = 0
    count = 0
    data = []
    with gzip.open(file_name) as fin:
        for l in fin:
            if total < 20:
                d = json.loads(l)
                count += 1
                total += 1
                data.append(d)

                # break if reaches the 100th line
                if (chunk is not None) and (count > chunk - 1):
                    df = pd.DataFrame(data)
                    count = 0
                    data.clear()
                    print(df)
            else:
                break
    return total

In [57]:
load_jc(os.path.join(dir, 'goodreads_books.json.gz'))

         isbn text_reviews_count    series country_code language_code  \
0  0312853122                  1        []           US                 
1  0743509986                  6        []           US                 
2                              7  [189911]           US           eng   
3  0743294297               3282        []           US           eng   
4  0850308712                  5        []           US                 
5  1599150603                  7        []           US                 
6  0425040887                  4        []           US                 
7  1934876569                  6  [151854]           US                 
8                              4        []           US                 
9  0922915113                 39        []           US                 

                                     popular_shelves        asin is_ebook  \
0  [{'count': '3', 'name': 'to-read'}, {'count': ...                false   
1  [{'count': '2634', 'name': 'to-read'}, 

20

In [110]:
def lw_jcBooks(file_name, chunk=10, nrows=-1):
    csv_file = os.path.join(dir, 'books.csv')
    total = 0
    count = 0
    data = []
    with gzip.open(file_name) as fin, open(csv_file, 'w', encoding='utf-8', newline='\n') as csv:
        header = True
        for l in fin:
            if nrows == -1:
                d = json.loads(l)
                count += 1
                total += 1
                data.append(d)

                if (chunk is not None) and (count > chunk - 1):
                    df = pd.DataFrame(data)
                    count = 0
                    data.clear()
                    df = df.drop(['popular_shelves', 'asin', 'kindle_asin', 'is_ebook', 'link', 'edition_information', 'url', 'description', 'image_url', 'series', 'authors', 'similar_books'], axis=1)
                    df.to_csv(csv, header=header, index=0)
                    header = False
            else:
                if total < nrows:
                    d = json.loads(l)
                    count += 1
                    total += 1
                    data.append(d)

                    if (chunk is not None) and (count > chunk - 1):
                        df = pd.DataFrame(data)
                        count = 0
                        data.clear()
                        df = df.drop(['popular_shelves', 'asin', 'kindle_asin', 'is_ebook', 'link', 'edition_information', 'url', 'description', 'image_url', 'series', 'authors', 'similar_books'], axis=1)
                        df.to_csv(csv, header=header, index=0)
                        header = False
                else:
                    break
    return total

In [111]:
lw_jcBooks(os.path.join(dir, 'goodreads_books.json.gz'), chunk=1000)

2360655

In [86]:
df_books = pd.read_csv(os.path.join(dir, 'books.csv'))

In [87]:
df_books.dtypes

isbn                     object
text_reviews_count      float64
country_code             object
language_code            object
average_rating          float64
format                   object
publisher                object
num_pages               float64
publication_day         float64
isbn13                   object
publication_month       float64
publication_year        float64
book_id                   int64
ratings_count           float64
work_id                 float64
title                    object
title_without_series     object
dtype: object

In [88]:
df_books.isna().any()

isbn                     True
text_reviews_count       True
country_code             True
language_code            True
average_rating           True
format                   True
publisher                True
num_pages                True
publication_day          True
isbn13                   True
publication_month        True
publication_year         True
book_id                 False
ratings_count            True
work_id                  True
title                    True
title_without_series     True
dtype: bool

In [89]:
df_books.isna().sum()

isbn                     983098
text_reviews_count          524
country_code                490
language_code           1059845
average_rating              524
format                   646578
publisher                654176
num_pages                763912
publication_day         1024157
isbn13                   780044
publication_month        882714
publication_year         599462
book_id                       0
ratings_count               524
work_id                     524
title                        11
title_without_series         11
dtype: int64

In [90]:
df_books.isna().sum() * 100 / len(df_books)

isbn                    41.656695
text_reviews_count       0.022203
country_code             0.020763
language_code           44.908686
average_rating           0.022203
format                  27.397373
publisher               27.719322
num_pages               32.369153
publication_day         43.396483
isbn13                  33.052712
publication_month       37.403136
publication_year        25.400932
book_id                  0.000000
ratings_count            0.022203
work_id                  0.022203
title                    0.000466
title_without_series     0.000466
dtype: float64

In [199]:
def lw_jcBooksInfo(file_name, chunk=10, nrows=-1):
    csv_file = os.path.join(dir, 'book_info.csv')
    total = 0
    count = 0
    data = []
    with gzip.open(file_name) as fin, open(csv_file, 'w', encoding='utf-8', newline='\n') as csvf:
        header = True
        for l in fin:
            if nrows == -1:
                d = json.loads(l)
                keysToExtract = ['book_id', 'work_id', 'description', 'link', 'url', 'image_url']
                dm = {key: d[key] for key in keysToExtract}
                count += 1
                total += 1
                data.append(dm)

                if (chunk is not None) and (count > chunk - 1):
                    df = pd.DataFrame(data)
                    df['description'] = df['description'].str.replace('\n', '\\n')
                    count = 0
                    data.clear()
                    df.to_csv(csvf, header=header, index=0)
                    header = False
            else:
                if total < nrows:
                    d = json.loads(l)
                    keysToExtract = ['book_id', 'work_id', 'description', 'link', 'url', 'image_url']
                    dm = {key: d[key] for key in keysToExtract}
                    count += 1
                    total += 1
                    data.append(dm)

                    if (chunk is not None) and (count > chunk - 1):
                        df = pd.DataFrame(data)
                        df['description'] = df['description'].str.replace('\n', '\\n')
                        count = 0
                        data.clear()
                        df.to_csv(csvf, header=header, index=0)
                        header = False
                else:
                    break
    return total

In [201]:
lw_jcBooksInfo(os.path.join(dir, 'goodreads_books.json.gz'), 1000)

2360655

In [178]:
def show():
    with gzip.open(os.path.join(dir, 'goodreads_books.json.gz')) as fin:
            for l in fin:
                d = json.loads(l)
                break
    return d

In [198]:
with open(os.path.join(dir, 'book_info.csv')) as f, open(os.path.join(dir, 'test.txt'), 'w', encoding='utf-8', newline='\n') as tex:
    fc = csv.reader(f)
    for i in range(5):
        tex.write((next(f)))

In [None]:
dd_bi = dd.read_csv(os.path.join(dir, 'book_info.csv'), dtype={'book_id': str, 'work_id': str, 'description': str, 'link': str, 'url': str, 'image_url': str}, engine='python', escapechar='~')
dd_bi

In [None]:
dd_bi.isna().sum().compute()

---

In [76]:
ddf_interactions = dd.read_csv(os.path.join(dir, 'goodreads_interactions.csv'))
ddf_interactions

Unnamed: 0_level_0,user_id,book_id,is_read,rating,is_reviewed
npartitions=68,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
,int64,int64,int64,int64,int64
,...,...,...,...,...
...,...,...,...,...,...
,...,...,...,...,...
,...,...,...,...,...


In [78]:
ddf_interactions.dtypes

user_id        int64
book_id        int64
is_read        int64
rating         int64
is_reviewed    int64
dtype: object

In [80]:
ddf_interactions.describe().compute()

Unnamed: 0,user_id,book_id,is_read,rating,is_reviewed
count,228648300.0,228648300.0,228648300.0,228648300.0,228648300.0
mean,250063.6,201575.6,0.490409,1.80134,0.07093491
std,171271.3,344867.7,0.499908,2.072672,0.2567161
min,0.0,0.0,0.0,0.0,0.0
25%,112551.0,13541.0,0.0,0.0,0.0
50%,233209.0,59840.0,1.0,3.0,0.0
75%,356406.0,264593.5,1.0,5.0,0.0
max,876144.0,2360649.0,1.0,5.0,1.0


In [75]:
ddf_interactions.isna().any().compute()

user_id        False
book_id        False
is_read        False
rating         False
is_reviewed    False
dtype: bool

---

### Authors to CSV

In [91]:
def lw_jcAuthors(file_name, chunk=10, nrows=-1):
    csv_file = os.path.join(dir, 'authors.csv')
    total = 0
    count = 0
    data = []
    with gzip.open(file_name) as fin, open(csv_file, 'w', encoding='utf-8', newline='\n') as csv:
        header = True
        for l in fin:
            if nrows == -1:
                d = json.loads(l)
                count += 1
                total += 1
                data.append(d)

                if (chunk is not None) and (count > chunk - 1):
                    df = pd.DataFrame(data)
                    count = 0
                    data.clear()
                    df.to_csv(csv, header=header, index=0)
                    header = False
            else:
                if total < nrows:
                    d = json.loads(l)
                    count += 1
                    total += 1
                    data.append(d)

                    if (chunk is not None) and (count > chunk - 1):
                        df = pd.DataFrame(data)
                        count = 0
                        data.clear()
                        df.to_csv(csv, header=header, index=0)
                        header = False
                else:
                    break
    return total

In [93]:
lw_jcAuthors(os.path.join(dir, 'goodreads_book_authors.json.gz'), chunk = 1000)

829529

In [94]:
df_authors = pd.read_csv(os.path.join(dir, 'authors.csv'))

In [95]:
df_authors.dtypes

average_rating        float64
author_id               int64
text_reviews_count      int64
name                   object
ratings_count           int64
dtype: object

In [96]:
df_authors.isna().any()

average_rating        False
author_id             False
text_reviews_count    False
name                   True
ratings_count         False
dtype: bool

In [98]:
df_authors.isna().sum()

average_rating        0
author_id             0
text_reviews_count    0
name                  5
ratings_count         0
dtype: int64

In [99]:
df_authors.describe()

Unnamed: 0,average_rating,author_id,text_reviews_count,ratings_count
count,829000.0,829000.0,829000.0,829000.0
mean,3.844858,5750882.0,106.929556,1596.299
std,0.602844,5129887.0,1770.788657,44810.97
min,0.0,3.0,0.0,0.0
25%,3.58,932297.0,2.0,8.0
50%,3.9,4951601.0,6.0,31.0
75%,4.17,7837560.0,20.0,132.0
max,5.0,17343370.0,448570.0,18532720.0


---

### Series to CSV

In [100]:
def lw_jcSeries(file_name, chunk=10, nrows=-1):
    csv_file = os.path.join(dir, 'series.csv')
    total = 0
    count = 0
    data = []
    with gzip.open(file_name) as fin, open(csv_file, 'w', encoding='utf-8', newline='\n') as csv:
        header = True
        for l in fin:
            if nrows == -1:
                d = json.loads(l)
                count += 1
                total += 1
                data.append(d)

                if (chunk is not None) and (count > chunk - 1):
                    df = pd.DataFrame(data)
                    count = 0
                    data.clear()
                    df.to_csv(csv, header=header, index=0)
                    header = False
            else:
                if total < nrows:
                    d = json.loads(l)
                    count += 1
                    total += 1
                    data.append(d)

                    if (chunk is not None) and (count > chunk - 1):
                        df = pd.DataFrame(data)
                        count = 0
                        data.clear()
                        df.to_csv(csv, header=header, index=0)
                        header = False
                else:
                    break
    return total

In [101]:
lw_jcSeries(os.path.join(dir, 'goodreads_book_series.json.gz'))

400390

In [102]:
df_series = pd.read_csv(os.path.join(dir, 'series.csv'))

In [103]:
df_series.dtypes

numbered                bool
note                  object
description           object
title                 object
series_works_count     int64
series_id              int64
primary_work_count     int64
dtype: object

In [104]:
df_series.isna().sum()

numbered                   0
note                  375111
description           249371
title                      6
series_works_count         0
series_id                  0
primary_work_count         0
dtype: int64

In [105]:
df_series.describe()

Unnamed: 0,series_works_count,series_id,primary_work_count
count,400390.0,400390.0,400390.0
mean,21.588149,623045.0,19.771653
std,65.1031,294445.3,63.501377
min,-14.0,144392.0,0.0
25%,3.0,363737.2,3.0
50%,6.0,615837.0,5.0
75%,14.0,877564.8,12.0
max,893.0,1143859.0,893.0
