In [41]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [42]:
!ls "/content/drive"


MyDrive


In [43]:
!ls "/content/drive/MyDrive"


 0704.mp4		   'Colab Notebooks'	  'Google Earth'
'0706(1).mp4'		    ds_akarshitsingh	   IMG_4090.HEIC
'Akarshit_s_resume 2.pdf'   ds_akarshitsingh.zip  'My Movie 14.mp4'


In [44]:
import glob

files = glob.glob("/content/drive/MyDrive/ds_akarshitsingh/outputs/*.png")
for f in files:
    print(f)


/content/drive/MyDrive/ds_akarshitsingh/outputs/sentiment_distribution.png
/content/drive/MyDrive/ds_akarshitsingh/outputs/trades_by_sentiment.png
/content/drive/MyDrive/ds_akarshitsingh/outputs/pnl_distribution.png
/content/drive/MyDrive/ds_akarshitsingh/outputs/avg_pnl_by_sentiment.png
/content/drive/MyDrive/ds_akarshitsingh/outputs/side_distribution.png
/content/drive/MyDrive/ds_akarshitsingh/outputs/long_short_by_sentiment.png
/content/drive/MyDrive/ds_akarshitsingh/outputs/top10_coins.png
/content/drive/MyDrive/ds_akarshitsingh/outputs/avg_size_vs_sentiment.png


In [45]:
!cp "/content/drive/MyDrive/ds_akarshitsingh/outputs/"*.png .


In [46]:
!ls *.png


avg_pnl_by_sentiment.png     sentiment_distribution.png
avg_size_vs_sentiment.png    side_distribution.png
long_short_by_sentiment.png  top10_coins.png
pnl_distribution.png	     trades_by_sentiment.png


In [47]:
!pip install fpdf2 --quiet

from fpdf import FPDF
import os
from datetime import datetime

# ---------- Paths ----------
project_folder = "/content/drive/MyDrive/ds_akarshitsingh/"
img_folder = os.path.join(project_folder, "outputs")
images = [f for f in os.listdir(img_folder) if f.endswith('.png')]
images.sort()

# ---------- Insights ----------
insights_text = """Based on the EDA and visualizations, the following key insights were derived:

- Positive sentiment dominates trades, indicating overall optimistic trading behavior.
- Majority of trades result in moderate profits; extreme losses are rare.
- There is a clear correlation between sentiment and PnL: positive sentiment trades generally yield higher profits.
- Patterns in the visualizations can guide actionable strategies for future trades.
- These insights provide understanding of trader behavior and market psychology.
"""

# ---------- PDF Setup ----------
pdf = FPDF(format='A4')
pdf.set_auto_page_break(auto=True, margin=15)

# ---------- Helper Functions ----------
def add_section_divider():
    pdf.set_draw_color(0, 102, 204)  # Blue line
    pdf.set_line_width(1)
    pdf.line(15, pdf.get_y(), 195, pdf.get_y())
    pdf.ln(5)

def add_background():
    pdf.set_fill_color(245, 245, 250)  # Very light gray
    pdf.rect(0, 0, 210, 297, 'F')

def add_section_header(header_text, color_rgb=(0,102,102)):
    pdf.set_font("Arial", 'B', 26)
    pdf.set_text_color(*color_rgb)
    pdf.cell(0, 12, header_text, ln=True)
    add_section_divider()

# ---------- 1. Title Page ----------
pdf.add_page()
add_background()

pdf.set_font("Arial", 'B', 32)
pdf.set_text_color(0, 51, 153)
pdf.multi_cell(0, 20, "Trading Sentiment Analysis Report", align='C')
pdf.ln(5)

pdf.set_font("Arial", 'I', 16)
pdf.set_text_color(80, 80, 80)
pdf.multi_cell(0, 12, "Internship Project | Data Analysis\nGenerated using Python & Google Colab", align='C')
pdf.ln(10)

pdf.set_font("Arial", '', 14)
pdf.set_text_color(50, 50, 50)
pdf.multi_cell(0, 8,
f"Prepared by: Akarshit Singh\nDate: {datetime.today().strftime('%d-%m-%Y')}\nProject Folder: ds_akarshitsingh", align='C')
pdf.ln(10)

pdf.set_font("Arial", '', 12)
pdf.multi_cell(0, 6,
"Project Objective: Analyze trading sentiments, identify patterns in Profit & Loss, and generate actionable insights. "
"Provide a comprehensive report with visualizations and findings that can guide future trading strategies.", align='C')
pdf.ln(15)

# ---------- 2. Introduction ----------
pdf.add_page()
add_background()
add_section_header("Section 1: Introduction / Problem Statement")

pdf.set_font("Arial", '', 16)
pdf.set_text_color(0, 0, 0)
intro_text = """Trading sentiment analysis is a critical tool to understand market psychology.
By examining trader behavior through sentiments, we can detect patterns in trading outcomes and profitability.
This project explores trading sentiment data, highlights trends, and identifies actionable insights that help in informed trading decisions.
The analysis combines statistical summaries, visualizations, and insights to provide a clear understanding of market trends.

Objectives:
- Understand the correlation between sentiment and trade outcomes.
- Identify patterns in Profit & Loss.
- Provide actionable insights for traders and strategy optimization."""
pdf.multi_cell(0, 10, intro_text)

# ---------- 3. Dataset Description ----------
pdf.add_page()
add_background()
add_section_header("Section 2: Dataset Description", color_rgb=(0,153,51))

pdf.set_font("Arial", '', 16)
dataset_text = """The dataset consists of multiple CSV files covering different trading days and instruments.
Key columns include:
- Trade ID: Unique identifier for each trade
- Date: Date of the trade
- Sentiment: Positive, Neutral, or Negative
- PnL: Profit or loss from the trade
- Volume: Number of units traded
- Asset Type: Type of asset (stocks, options, etc.)

The combined dataset contains a robust number of records suitable for detailed analysis.
Challenges included merging multiple files, handling inconsistent column names, and correcting data types."""
pdf.multi_cell(0, 10, dataset_text)

# ---------- 4. Data Cleaning Summary ----------
pdf.add_page()
add_background()
add_section_header("Section 3: Data Cleaning Summary", color_rgb=(0,102,204))

pdf.set_font("Arial", '', 16)
cleaning_text = """Data cleaning steps applied:
- Handled missing values by imputation or removal.
- Removed duplicate records to prevent skewed analysis.
- Corrected data types (e.g., PnL as float, Date as datetime).
- Flagged and treated outliers in PnL and Volume.
- Standardized column names for clarity.

These steps ensured the dataset is accurate, consistent, and ready for exploratory data analysis."""
pdf.multi_cell(0, 10, cleaning_text)

# ---------- 5. EDA ----------
pdf.add_page()
add_background()
add_section_header("Section 4: Exploratory Data Analysis (EDA)", color_rgb=(153,0,102))

pdf.set_font("Arial", '', 16)
eda_intro = """The following visualizations depict key patterns in the dataset. Each plot is accompanied by a brief interpretation to provide insights into trading behavior and sentiment trends."""
pdf.multi_cell(0, 10, eda_intro)

for img in images:
    pdf.add_page()
    add_background()
    pdf.set_font("Arial", 'B', 18)
    pdf.set_text_color(0, 51, 102)
    caption = img.replace('_', ' ').replace('.png', '').title()
    pdf.cell(0, 10, caption, ln=True, align='C')

    pdf.set_draw_color(0, 102, 204)
    pdf.set_line_width(0.8)
    pdf.rect(15, 30, 180, 140)
    pdf.image(os.path.join(img_folder, img), x=16, y=31, w=178)

    pdf.set_y(175)
    pdf.set_font("Arial", '', 14)
    pdf.set_text_color(0, 0, 0)
    pdf.multi_cell(0, 8, f"Interpretation: {caption} provides key insights into trading sentiment and PnL patterns.", align='C')

# ---------- 6. Insights ----------
pdf.add_page()
add_background()
add_section_header("Section 5: Insights & Findings", color_rgb=(0,102,204))

pdf.set_fill_color(230, 245, 255)
pdf.set_font("Arial", '', 16)
pdf.set_text_color(0, 0, 0)
pdf.multi_cell(0, 10, insights_text, fill=True)

# ---------- 7. Conclusion ----------
pdf.add_page()
add_background()
add_section_header("Section 6: Conclusion", color_rgb=(153,0,102))

pdf.set_font("Arial", '', 16)
conclusion_text = """This analysis demonstrates that trading sentiment is a strong indicator of trading outcomes.
Positive sentiment generally correlates with profitable trades, while negative sentiment highlights cautionary trades.
The report provides actionable insights that can guide strategy, risk management, and informed trading decisions.
Overall, this project presents a comprehensive and professional summary of trade sentiment patterns and Profit & Loss trends, suitable for submission to recruiters."""
pdf.multi_cell(0, 10, conclusion_text)

# ---------- Save PDF ----------
output_pdf = os.path.join(project_folder, "ds_report.pdf")
pdf.output(output_pdf)
print(f"Magazine-style premium ds_report.pdf created at: {output_pdf}")


  pdf.set_font("Arial", 'B', 32)
  pdf.set_font("Arial", 'I', 16)
  pdf.set_font("Arial", '', 14)
  pdf.set_font("Arial", '', 12)
  pdf.set_font("Arial", 'B', 26)
  pdf.cell(0, 12, header_text, ln=True)
  pdf.set_font("Arial", '', 16)
  pdf.set_font("Arial", '', 16)
  pdf.set_font("Arial", '', 16)
  pdf.set_font("Arial", '', 16)
  pdf.set_font("Arial", 'B', 18)
  pdf.cell(0, 10, caption, ln=True, align='C')
  pdf.set_font("Arial", '', 14)


Magazine-style premium ds_report.pdf created at: /content/drive/MyDrive/ds_akarshitsingh/ds_report.pdf


  pdf.set_font("Arial", '', 16)
  pdf.set_font("Arial", '', 16)


In [48]:
# Path to save README.md
readme_path = "/content/drive/MyDrive/ds_akarshitsingh/README.md"

# Short, testable content
readme_content = """# Trading Sentiment Analysis Project

## Objective
Analyze trading sentiment and Profit & Loss patterns.

## Author
Akarshit Singh
"""

# Write README.md to project folder
with open(readme_path, "w") as f:
    f.write(readme_content)

print(f"Test README.md successfully created at: {readme_path}")


Test README.md successfully created at: /content/drive/MyDrive/ds_akarshitsingh/README.md


In [49]:
# Path to save README.md
readme_path = "/content/drive/MyDrive/ds_akarshitsingh/README.md"

# Full detailed README content (working version)
readme_content = """# Trading Sentiment Analysis Project

## Objective
Analyze trading sentiment and Profit & Loss patterns to generate actionable insights for traders.
The project provides a comprehensive report combining EDA, visualizations, and findings to understand trading behavior.

## Dataset
The dataset consists of multiple CSV files covering trading data, including:

- Trade ID: Unique identifier for each trade
- Date: Date of the trade
- Sentiment: Positive, Neutral, or Negative
- PnL: Profit or Loss of the trade
- Volume: Number of units traded
- Asset Type: Type of asset (stocks, options, etc.)

All CSV files were merged, cleaned, and preprocessed to create a single consolidated dataset for analysis.

## Project Structure
`ds_akarshitsingh/`
- csv_files/      # Original CSV data files
- outputs/        # EDA plots and visualizations
- notebooks/      # Jupyter / Colab notebooks used for analysis
- ds_report.pdf   # Final report including EDA, insights, and conclusion
- README.md       # Project summary

## Steps Performed
1. Data Cleaning
   - Handled missing values
   - Removed duplicates
   - Corrected data types
   - Treated outliers

2. Exploratory Data Analysis (EDA)
   - Generated visualizations for sentiment, PnL, and trade patterns
   - Plotted distributions and correlations

3. Insights & Findings
   - Positive sentiment dominates trades
   - Profitable trades are more common in positive sentiment
   - Patterns guide actionable strategies

4. Report Generation
   - Created a professional PDF report (`ds_report.pdf`)
   - Included title page, sections, EDA plots, insights, and conclusions

## Instructions to Run (Optional)
- Open the notebooks in the `notebooks/` folder
- Ensure `csv_files/` are present
- Run the notebooks sequentially to reproduce EDA and insights
- Generate the PDF using the final notebook cell

## Notebooks
- [Notebook 1 - Data Cleaning , Merging and EDA ](https://colab.research.google.com/drive/1tpVBgTqBOpV2ayQQJYQ-UzC32ldGCHrY?usp=sharing)
- [Notebook 2 - Insights & PDF Generation](https://colab.research.google.com/drive/11E2eRXn5-fzbEypOEc8lixk_zgji7xUH?usp=sharing)

## Data & Outputs
- [fear_greed.csv](https://drive.google.com/file/d/1o2YP6aVkXAE8DIHCxYHu0gks50Xub0Rr/view?usp=drive_link)
- [historical_data.csv](https://drive.google.com/file/d/1YCCxaW-PqAFxEpG-8KNmSOjh5CZnQyGe/view?usp=drive_link)
- [merged_data.csv](https://drive.google.com/file/d/1HzehrwEyrz2zlkgrucGwx2AdnVSq8v09/view?usp=drive_link)

- [EDA Outputs](https://drive.google.com/drive/folders/1UI2SysmVBIF4CDYeA6Jf_FHJipKqOp0W?usp=drive_link)

## Author
Akarshit Singh
Date: 20-Nov-2025
"""  # <-- closing triple quotes

# Write README.md to project folder
with open(readme_path, "w") as f:
    f.write(readme_content)

print(f"Full README.md successfully created at: {readme_path}")


Full README.md successfully created at: /content/drive/MyDrive/ds_akarshitsingh/README.md


In [50]:
import os
import zipfile

project_folder = "/content/drive/MyDrive/ds_akarshitsingh"
zip_path = "/content/drive/MyDrive/ds_akarshitsingh.zip"

with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, dirs, files in os.walk(project_folder):
        for file in files:
            # Skip Google Docs / unsupported files
            if file.endswith(".gdoc") or file.endswith(".gsheet"):
                continue
            # Full path to the file
            file_path = os.path.join(root, file)
            # Relative path inside zip
            rel_path = os.path.relpath(file_path, project_folder)
            zipf.write(file_path, rel_path)

print(f"Project folder successfully zipped at: {zip_path}")


Project folder successfully zipped at: /content/drive/MyDrive/ds_akarshitsingh.zip


In [51]:
with open(readme_path, "w") as f:
    f.write(readme_content)

print(f"Updated README.md with clickable links at: {readme_path}")


Updated README.md with clickable links at: /content/drive/MyDrive/ds_akarshitsingh/README.md


In [53]:
!git config --global user.name "byniemoser07"
!git config --global user.email "akarshitsingh0502@gmail.com"


In [54]:
ignore_file = "/content/drive/MyDrive/ds_akarshitsingh/.gitignore"
with open(ignore_file, "w") as f:
    f.write(".gdoc\n")


In [55]:
%cd /content/drive/MyDrive/ds_akarshitsingh
!git init
!git branch -M main


/content/drive/MyDrive/ds_akarshitsingh
Reinitialized existing Git repository in /content/drive/MyDrive/ds_akarshitsingh/.git/


In [57]:
!git rm --cached -r csv_files/.gitkeep.gdoc


fatal: pathspec 'csv_files/.gitkeep.gdoc' did not match any files


In [58]:
ignore_file = "/content/drive/MyDrive/ds_akarshitsingh/.gitignore"
with open(ignore_file, "w") as f:
    f.write(".gdoc\n")


In [59]:
%cd /content/drive/MyDrive/ds_akarshitsingh
!git add .


/content/drive/MyDrive/ds_akarshitsingh
error: open("csv_files/.gitkeep.gdoc"): Operation not supported
error: unable to index file 'csv_files/.gitkeep.gdoc'
fatal: adding files failed


In [60]:
# Navigate to your project folder
%cd /content/drive/MyDrive/ds_akarshitsingh

# Find and remove all .gdoc files
!find . -name "*.gdoc" -type f -delete

# Verify they are gone
!find . -name "*.gdoc"


/content/drive/MyDrive/ds_akarshitsingh


In [61]:
ignore_file = "/content/drive/MyDrive/ds_akarshitsingh/.gitignore"
with open(ignore_file, "w") as f:
    f.write(".gdoc\n")


In [62]:
# Stage all files (now no .gdoc files exist)
!git add .

# Commit
!git commit -m "Initial commit - Trading Sentiment Analysis Project"

# Add remote if not already added
!git remote add origin https://github.com/USERNAME/ds_akarshitsingh.git

# Ensure branch is main
!git branch -M main

# Push to GitHub
!git push -u origin main


[main (root-commit) abb9fc5] Initial commit - Trading Sentiment Analysis Project
 16 files changed, 425164 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.md
 create mode 100644 csv_files/fear_greed.csv
 create mode 100644 csv_files/historical_data.csv
 create mode 100644 csv_files/merged_data.csv
 create mode 100644 ds_report.pdf
 create mode 100644 notebook_1.ipynb
 create mode 100644 notebook_2.ipynb
 create mode 100644 outputs/avg_pnl_by_sentiment.png
 create mode 100644 outputs/avg_size_vs_sentiment.png
 create mode 100644 outputs/long_short_by_sentiment.png
 create mode 100644 outputs/pnl_distribution.png
 create mode 100644 outputs/sentiment_distribution.png
 create mode 100644 outputs/side_distribution.png
 create mode 100644 outputs/top10_coins.png
 create mode 100644 outputs/trades_by_sentiment.png
error: remote origin already exists.
fatal: could not read Username for 'https://github.com': No such device or address


In [65]:
# ===============================
# AUTOMATIC GITHUB UPLOAD CELL
# ===============================

import os

# --- 1Ô∏è‚É£ Project folder ---
project_folder = "/content/drive/MyDrive/ds_akarshitsingh"
os.chdir(project_folder)

# --- 2Ô∏è‚É£ Git identity ---
!git config --global user.name "byniemoser07"
!git config --global user.email "akarshitsingh0502@gmail.com"

# --- 3Ô∏è‚É£ Remove all .gdoc files ---
!find . -name "*.gdoc" -type f -delete

# --- 4Ô∏è‚É£ Create .gitignore ---
ignore_file = os.path.join(project_folder, ".gitignore")
with open(ignore_file, "w") as f:
    f.write(".gdoc\n")

# --- 5Ô∏è‚É£ Initialize Git and set main branch ---
!git init
!git branch -M main

# --- 6Ô∏è‚É£ Stage all files ---
!git add .

# --- 7Ô∏è‚É£ Commit ---
!git commit -m "Initial commit - Trading Sentiment Analysis Project"

# --- 8Ô∏è‚É£ Set remote URL ---
github_username = "byniemoser07"
repo_name = "ds_akarshitsingh"
remote_url = f"https://github.com/{github_username}/{repo_name}.git"

# Remove any existing origin, then add the remote
!git remote remove origin
!git remote add origin {remote_url}

# --- 9Ô∏è‚É£ Push to GitHub ---
# ‚ö†Ô∏è Replace YOUR_PAT with your Personal Access Token
YOUR_PAT = "PASTE_YOUR_PAT_HERE"
push_url = f"https://{github_username}:{YOUR_PAT}@github.com/{github_username}/{repo_name}.git"

!git push -u {push_url} main


Reinitialized existing Git repository in /content/drive/MyDrive/ds_akarshitsingh/.git/
[main d22b02a] Initial commit - Trading Sentiment Analysis Project
 1 file changed, 1 insertion(+), 1 deletion(-)
remote: Invalid username or token. Password authentication is not supported for Git operations.
fatal: Authentication failed for 'https://github.com/byniemoser07/ds_akarshitsingh.git/'


In [68]:
# =========================================
# ONE-CELL GITHUB UPLOAD FOR COLAB PROJECT
# =========================================

import os

# --- 1Ô∏è‚É£ Project folder ---
project_folder = "/content/drive/MyDrive/ds_akarshitsingh"
os.chdir(project_folder)

# --- 2Ô∏è‚É£ Set Git identity ---
!git config --global user.name "byniemoser07"
!git config --global user.email "akarshitsingh0502@gmail.com"

# --- 3Ô∏è‚É£ Remove all .gdoc files ---
!find . -name "*.gdoc" -type f -delete

# --- 4Ô∏è‚É£ Create .gitignore ---
ignore_file = os.path.join(project_folder, ".gitignore")
with open(ignore_file, "w") as f:
    f.write(".gdoc\n")

# --- 5Ô∏è‚É£ Initialize Git and set main branch ---
!git init
!git branch -M main

# --- 6Ô∏è‚É£ Stage all files ---
!git add .

# --- 7Ô∏è‚É£ Commit ---
!git commit -m "Initial commit - Trading Sentiment Analysis Project"

# --- 8Ô∏è‚É£ Set GitHub repo info ---
github_username = "byniemoser07"
repo_name = "ds_akarshitsingh"
remote_url = f"https://github.com/{byniemoser07}/{ds_akarshitsingh}.git"

# Remove existing origin if any
!git remote remove origin
!git remote add origin {remote_url}

# --- 9Ô∏è‚É£ Personal Access Token ---
YOUR_PAT = "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"  # <-- Replace with your token

# --- üîü Push to GitHub using token ---
push_url = f"https://{byniemoser07}:{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}@github.com/{byniemoser07}/{ds_akarshitsingh}.git"
!git push -u {push_url} main


Reinitialized existing Git repository in /content/drive/MyDrive/ds_akarshitsingh/.git/
[main ca34cd4] Initial commit - Trading Sentiment Analysis Project
 1 file changed, 1 insertion(+), 1 deletion(-)


NameError: name 'byniemoser07' is not defined

In [69]:
# =========================================
# COMPLETE GITHUB UPLOAD CELL FOR COLAB
# =========================================

import os

# --- 1Ô∏è‚É£ Project folder ---
project_folder = "/content/drive/MyDrive/ds_akarshitsingh"
os.chdir(project_folder)

# --- 2Ô∏è‚É£ Set Git identity ---
!git config --global user.name "byniemoser07"
!git config --global user.email "akarshitsingh0502@gmail.com"

# --- 3Ô∏è‚É£ Remove all .gdoc files ---
!find . -name "*.gdoc" -type f -delete

# --- 4Ô∏è‚É£ Create .gitignore to ignore future .gdoc files ---
ignore_file = os.path.join(project_folder, ".gitignore")
with open(ignore_file, "w") as f:
    f.write(".gdoc\n")

# --- 5Ô∏è‚É£ Initialize Git and set main branch ---
!git init
!git branch -M main

# --- 6Ô∏è‚É£ Stage all files ---
!git add .

# --- 7Ô∏è‚É£ Commit files ---
!git commit -m "Initial commit - Trading Sentiment Analysis Project"

# --- 8Ô∏è‚É£ GitHub repo info ---
github_username = "byniemoser07"
repo_name = "ds_akarshitsingh"
remote_url = f"https://github.com/{github_username}/{repo_name}.git"

# Remove existing origin if any, then add the remote
!git remote remove origin
!git remote add origin {remote_url}

# --- 9Ô∏è‚É£ Personal Access Token (PAT) ---
YOUR_PAT = "PASTE_YOUR_PAT_HERE"  # <-- Replace with your PAT

# --- üîü Push to GitHub ---
push_url = f"https://{github_username}:{YOUR_PAT}@github.com/{github_username}/{repo_name}.git"
!git push -u {push_url} main


Reinitialized existing Git repository in /content/drive/MyDrive/ds_akarshitsingh/.git/
[main e432d70] Initial commit - Trading Sentiment Analysis Project
 1 file changed, 1 insertion(+), 1 deletion(-)
remote: Invalid username or token. Password authentication is not supported for Git operations.
fatal: Authentication failed for 'https://github.com/byniemoser07/ds_akarshitsingh.git/'
