In [1]:
!pip install --upgrade google-cloud-bigquery pandas matplotlib seaborn --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.4/12.4 MB[0m [31m63.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m52.1 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.0 which is incompatible.
dask-cudf-cu12 25.2.2 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.0 which is incompatible.
cudf-cu12 25.2.1 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.0 which is incompatible.[0m[31m
[0m

In [3]:
# ✅ Step 2: Authenticate using your Google Account (easy!)
from google.colab import auth
auth.authenticate_user()

## 🆔 How to Find Your Project ID
Go to: https://console.cloud.google.com/

Create/select a project

Copy the Project ID (e.g., data-kaustav-789)

If you’ve never created a GCP project before, go to:
👉 https://console.cloud.google.com/projectcreate

In [4]:
# ✅ Step 3: Import libraries
from google.cloud import bigquery
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style='whitegrid')

# Replace with your actual GCP project ID
client = bigquery.Client(project="your-project-id")

In [5]:
# ✅ Step 4: Helper function to run queries
def run_query(query):
    return client.query(query).to_dataframe()

In [None]:
query1 = """
WITH netflix AS (
  SELECT * FROM `bigquery-public-data.netflix_titles.netflix_titles`
)
SELECT type, COUNT(*) AS count
FROM netflix
GROUP BY type
"""
df1 = run_query(query1)

# Plot it
sns.barplot(data=df1, x='type', y='count', palette='Set2')
plt.title("🎬 Count of Movies vs TV Shows on Netflix")
plt.xlabel("Type")
plt.ylabel("Count")
plt.show()

## VSCODE SETUP

# ✅ So in VS Code, you must:
🔐 Step 1: Download a Service Account Key JSON
Go to Google Cloud Console → IAM & Admin → Service Accounts

Select your project.

Create a service account (if not already done).

Go to Keys tab → Add Key → Create new key → JSON

Save the .json file (e.g., kaustav-gcp-key.json) in your VS Code project folder.

# 🛠️ Step 2: Install Required Libraries

In [None]:
!pip install google-cloud-bigquery pandas matplotlib seaborn


# 🔧 Step 3: Authenticate with Service Account Key

In [None]:
from google.oauth2 import service_account
from google.cloud import bigquery
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style='whitegrid')

# ✅ Path to your key file
key_path = "kaustav-gcp-key.json"

# ✅ Create credentials and client
credentials = service_account.Credentials.from_service_account_file(key_path)
client = bigquery.Client(credentials=credentials, project=credentials.project_id)


# ✅ Step 4: Define Helper and Run Query

In [None]:
def run_query(query):
    return client.query(query).to_dataframe()

query1 = """
WITH netflix AS (
  SELECT * FROM `bigquery-public-data.netflix_titles.netflix_titles`
)
SELECT type, COUNT(*) AS count
FROM netflix
GROUP BY type
"""

df1 = run_query(query1)


# 📊 Step 5: Plot in VS Code

In [None]:
sns.barplot(data=df1, x='type', y='count', palette='Set2')
plt.title("🎬 Count of Movies vs TV Shows on Netflix")
plt.xlabel("Type")
plt.ylabel("Count")
plt.show()


# ✅ Summary: VS Code Setup Steps
✅ Install required Python packages

✅ Download & save service account .json key

✅ Use google.oauth2.service_account for authentication

✅ Connect to BigQuery and run SQL queries just like in Colab