In [None]:
import os
import csv
import requests
import pandas as pd
import tweepy  # ✅ Tweepy replaces snscrape
import praw
import kaggle as kg
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from pytrends.request import TrendReq

## ✅ Environment Setup

# Kaggle Credentials (Replace with your Kaggle API Key)
os.environ['KAGGLE_USERNAME'] = '{KAGGLE_USERNAME}'
os.environ['KAGGLE_KEY'] = '{KAGGLE_KEY}'
kg.api.authenticate()

# VADER Sentiment Analyzer
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

# Reddit API Setup (Replace with your credentials)
reddit = praw.Reddit(
    client_id="YOUR_CLIENT_ID",
    client_secret="YOUR_CLIENT_SECRET",
    user_agent="YOUR_USER_AGENT"
)

## ✅ Dataset 1: Bitcoin Historical Data
data_1 = "mczielinski/bitcoin-historical-data"
df_1 = "on.zip"
kg.api.dataset_download_files(dataset=data_1, path=df_1, unzip=True)
df_1 = pd.read_csv('on.zip/btcusd_1-min_data.csv')

## ✅ Dataset 2: Bitcoin/Ethereum Prices
data_2 = "kapturovalexander/bitcoin-and-ethereum-prices-from-start-to-2023"
df_2 = "on_1.zip"
kg.api.dataset_download_files(dataset=data_2, path=df_2, unzip=True)
df_2 = pd.read_csv('on_1.zip/bitcoin_prices.csv')  # Ensure correct filename

## ✅ Dataset 3: Bitcoin Fear & Greed Index
def get_fng_data():
    url = "https://api.alternative.me/fng/"
    params = {"limit": 0, "format": "json", "date_format": "world"}
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()["data"]
    else:
        print("Error:", response.status_code)
        return None

def convert_csv(data, filename="fear_greed_index.csv"):
    if not data:
        print("No data")
        return
    keys = data[0].keys()
    with open(filename, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=keys)
        writer.writeheader()
        writer.writerows(data)
    print(f"✅ Data saved to {filename}")

df_3 = get_fng_data()
convert_csv(df_3)

## ✅ Dataset 4: Twitter Scraping (Using Tweepy)

# Twitter API Credentials (Replace with yours)
TWITTER_BEARER_TOKEN = "YOUR_TWITTER_BEARER_TOKEN"

# Authenticate with Twitter API
client = tweepy.Client(bearer_token=TWITTER_BEARER_TOKEN)

def fetch_tweets(query="Bitcoin OR BTC -filter:retweets", max_tweets=1000):
    tweets = []
    for tweet in tweepy.Paginator(client.search_recent_tweets, 
                                  query=query, tweet_fields=['created_at', 'text', 'author_id'], 
                                  max_results=100).flatten(limit=max_tweets):
        sentiment = sia.polarity_scores(tweet.text)
        tweets.append([
            tweet.created_at, tweet.author_id, tweet.text, 
            sentiment['compound'], sentiment['pos'], sentiment['neu'], sentiment['neg']
        ])

    df = pd.DataFrame(tweets, columns=['Date', 'Username', 'Tweet', 'Compound', 'Positive', 'Neutral', 'Negative'])
    df.to_csv("bitcoin_sentiment.csv", index=False)
    print("✅ Twitter data saved to bitcoin_sentiment.csv")

fetch_tweets()

## ✅ Dataset 5: Reddit Scraping
def scrape_reddit(subreddit_name="Bitcoin", limit=1000):
    subreddit = reddit.subreddit(subreddit_name)
    posts = []

    for post in subreddit.hot(limit=limit):
        posts.append([
            post.title,
            post.score,
            post.created_utc,
            post.url
        ])
    
    df = pd.DataFrame(posts, columns=["Title", "Score", "Timestamp", "URL"])
    df.to_csv("reddit_bitcoin.csv", index=False)
    print("✅ Reddit data saved to reddit_bitcoin.csv")

scrape_reddit()

## ✅ Dataset 6: Google Search Trends
def fetch_google(keyword="Bitcoin", timeframe="today 30-m"):
    pytrends = TrendReq(hl="en-US", tz=360)
    pytrends.build_payload([keyword], cat=0, timeframe=timeframe, geo="", gprop="")
    
    trends_data = pytrends.interest_over_time()
    if "isPartial" in trends_data.columns:
        trends_data.drop(columns=["isPartial"], inplace=True)
    
    trends_data.to_csv("google_trends_bitcoin.csv")
    print("✅ Google Trends data saved to google_trends_bitcoin.csv")

fetch_google()


AttributeError: 'FileFinder' object has no attribute 'find_module'