In [1]:
from decouple import config
import pymongo
from pprint import pprint
from gql import gql, Client
from gql.transport.aiohttp import AIOHTTPTransport
import pandas as pd


MONGODB_API_KEY = config("MONGODB_API_KEY")
ANNICT_API_KEY = config("ANNICT_API_KEY")
ANNICT_API_URL = "https://api.annict.com/graphql"

# Select your transport with a defined url endpoint
headers = {
    "Content-type": "application/json",
    "Authorization": f"Bearer {ANNICT_API_KEY}"
    }
transport = AIOHTTPTransport(url=ANNICT_API_URL, headers=headers)

# Create a GraphQL client using the defined transport
client = Client(transport=transport, fetch_schema_from_transport=True, execute_timeout=300)




In [2]:
# Provide a GraphQL query
query = gql(
    """
query {
  searchWorks(
    orderBy: {field: WATCHERS_COUNT, direction: DESC},
  ) {
    nodes {
      annictId
      title
      titleKana
      watchersCount
      reviewsCount
      reviews {
        nodes {
          id
          ratingOverallState
          ratingMusicState
          ratingStoryState
          ratingAnimationState
          ratingCharacterState
        }
      }
    }
  }
}
"""
)

# result = client.execute(query)
result = await client.execute_async(query)

In [21]:
client = pymongo.MongoClient(MONGODB_API_KEY)
db = client.reviews
collection_raw_data = db.raw_data

In [3]:
df = pd.json_normalize(result["searchWorks"]["nodes"], ["reviews", "nodes"], ["annictId", "title", "titleKana", "watchersCount", "reviewsCount"])

In [12]:
df.to_pickle("./data/annictRaw.pkl")

In [4]:
df.head()

Unnamed: 0,id,ratingOverallState,ratingMusicState,ratingStoryState,ratingAnimationState,ratingCharacterState,annictId,title,titleKana,watchersCount,reviewsCount
0,UmV2aWV3LTIxNDE=,GREAT,GREAT,GREAT,GREAT,GREAT,2108,魔法少女まどか☆マギカ,まほうしょうじょまどかまぎか,10750,101
1,UmV2aWV3LTIyNTM=,GOOD,GREAT,GOOD,AVERAGE,AVERAGE,2108,魔法少女まどか☆マギカ,まほうしょうじょまどかまぎか,10750,101
2,UmV2aWV3LTI0MDI=,GREAT,GOOD,GREAT,GOOD,GREAT,2108,魔法少女まどか☆マギカ,まほうしょうじょまどかまぎか,10750,101
3,UmV2aWV3LTE1MzI2,GOOD,GREAT,GREAT,BAD,GREAT,2108,魔法少女まどか☆マギカ,まほうしょうじょまどかまぎか,10750,101
4,UmV2aWV3LTQ2Nzc3,GREAT,,,,,2108,魔法少女まどか☆マギカ,まほうしょうじょまどかまぎか,10750,101


In [9]:
df.describe()

Unnamed: 0,id,ratingOverallState,ratingMusicState,ratingStoryState,ratingAnimationState,ratingCharacterState,annictId,title,titleKana,watchersCount,reviewsCount
count,60816,47543,38799,39095,39111,39095,60816,60816,60816.0,60816,60816
unique,60816,4,4,4,4,4,5221,5221,4372.0,2023,124
top,UmV2aWV3LTIxNDE=,GREAT,GOOD,GOOD,GREAT,GREAT,7713,邪神ちゃんドロップキックX,,928,6
freq,1,18905,14956,14128,15675,18375,275,275,2043.0,294,1387


In [13]:
df2 = df.replace({"BAD": -1, "AVERAGE": 1, "GOOD": 2, "GREAT": 4})

In [14]:
df2 = df2[df2["reviewsCount"]> 0]

In [15]:
df2.describe()

Unnamed: 0,ratingOverallState,ratingMusicState,ratingStoryState,ratingAnimationState,ratingCharacterState,annictId,watchersCount,reviewsCount
count,47543.0,38799.0,39095.0,39111.0,39095.0,60816.0,60816.0,60816.0
mean,2.52136,2.443491,2.319836,2.5467,2.713953,5295.476026,2285.357192,40.655469
std,1.331604,1.252575,1.411777,1.292015,1.316262,2298.433153,2106.670266,37.700087
min,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,2.0,1.0
25%,2.0,1.0,1.0,2.0,2.0,3737.75,729.0,13.0
50%,2.0,2.0,2.0,2.0,2.0,5624.0,1666.0,32.0
75%,4.0,4.0,4.0,4.0,4.0,7001.0,3189.0,56.0
max,4.0,4.0,4.0,4.0,4.0,10113.0,10750.0,275.0


In [25]:
df_Overall = df2[["annictId", "id", "ratingOverallState", "title", "titleKana", "watchersCount", "reviewsCount"]]
df_Overall = df_Overall.dropna()
df_Music = df2[["annictId", "id", "ratingMusicState", "title", "titleKana", "watchersCount", "reviewsCount"]]
df_Music = df_Music.dropna()
df_Story = df2[["annictId", "id", "ratingStoryState", "title", "titleKana", "watchersCount", "reviewsCount"]]
df_Story = df_Story.dropna()
df_Animation = df2[["annictId", "id", "ratingAnimationState", "title", "titleKana", "watchersCount", "reviewsCount"]]
df_Animation = df_Animation.dropna()
df_Character = df2[["annictId", "id", "ratingCharacterState", "title", "titleKana", "watchersCount", "reviewsCount"]]
df_Character = df_Character.dropna()

In [26]:
df_Overall.describe()

Unnamed: 0,annictId,ratingOverallState,watchersCount,reviewsCount
count,47543.0,47543.0,47543.0,47543.0
mean,5370.774877,2.52136,2312.62394,38.414488
std,2374.173342,1.331604,2074.919463,32.481666
min,1.0,-1.0,2.0,1.0
25%,4147.0,2.0,779.0,13.0
50%,5788.0,2.0,1733.0,31.0
75%,7154.0,4.0,3187.0,55.0
max,10113.0,4.0,10750.0,275.0


In [30]:
collection_Overall = db.ratingOverallState
collection_Music = db.ratingMusicState
collection_Story = db.ratingStoryState
collection_Animation = db.ratingAnimationState
collection_Character = db.ratingCharacterState

In [28]:
collection_Overall.insert_many(df_Overall.to_dict("records"))

<pymongo.results.InsertManyResult at 0x7fdebe693f40>

In [31]:
collection_Music.insert_many(df_Music.to_dict("records"))
collection_Story.insert_many(df_Story.to_dict("records"))
collection_Animation.insert_many(df_Animation.to_dict("records"))
collection_Character.insert_many(df_Character.to_dict("records"))
print("successfully inserted🍩")

successfully inserted🍩
