In [1]:
import pandas as pd
from datetime import datetime
import time
from deep_translator import GoogleTranslator
import pandas as pd
import numpy as np
import nest_asyncio
from bs4 import BeautifulSoup
import json
import random
import os
import requests
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
import asyncio



# Reddit

## Emoji

In [2]:
# Sample dictionary for localizing translations and adding emojis
emoji_map = {
    # Greetings and Polite Words
    "你好": "👋",  # Hello
    "谢谢": "🙏",  # Thank you
    "请": "🙏",  # Please
    "吗": "❓",

    # Emotions
    "开心": "😄",  # Happy
    "难过": "😢",  # Sad
    "生气": "😡",  # Angry
    "害怕": "😨",  # Scared
    "爱": "❤️",  # Love
    "喜欢": "😍",  # Like
    "失望": "😞",  # Disappointed
    "激动": "🤩",  # Excited
    "累": "😴",  # Tired
    "困": "😪",  # Sleepy
    "无聊": "😐",  # Bored
    "惊讶": "😲",  # Surprised
    "笑": "😂",  # Laugh
    "哭": "😭",  # Cry

    # Actions
    "吃": "🍽️",  # Eat
    "喝": "🍻",  # Drink

    # Weather and Nature
    "晴天": "☀️",  # Sunny
    "下雨": "🌧️",  # Rain
    "雪": "❄️",  # Snow
    "风": "🌬️",  # Windy
    "花": "🌸",  # Flower
    "树": "🌳",  # Tree
    "山": "⛰️",  # Mountain
    "海": "🌊",  # Ocean
    "火": "🔥",  # Fire

    # Objects and Things
    "手机": "📱",  # Mobile phone
    "电脑": "💻",  # Computer
    "车": "🚗",  # Car
    "钱": "💰",  # Money
    "书": "📖",  # Book
    "礼物": "🎁",  # Gift
    "电视": "📺",  # TV

    # Food and Drink
    "苹果": "🍎",  # Apple
    "蛋糕": "🍰",  # Cake
    "冰淇淋": "🍦",  # Ice cream
    "咖啡": "☕",  # Coffee
    "啤酒": "🍺",  # Beer
    "牛奶": "🥛",  # Milk
    "茶": "🍵",  # Tea
    "面条": "🍜",  # Noodles

    # Places
    "学校": "🏫",  # School
    "家": "🏡",  # Home
    "公园": "🏞️",  # Park
    "医院": "🏥",  # Hospital
    "餐厅": "🍴",  # Restaurant
    "机场": "✈️",  # Airport
    "火车站": "🚉",  # Train station

    # Time
    "今天": "📅",  # Today
    "明天": "🗓️",  # Tomorrow
    "昨天": "🕰️",  # Yesterday
    "现在": "⌚",  # Now
    "时间": "⏰",  # Time

    # Miscellaneous
    "是的": "✅",  # Yes
    "不是": "❌",  # No
    "好": "👍",  # Good
    "坏": "👎",  # Bad
    "快": "🚀",  # Fast
    "慢": "🐢",  # Slow
    "问题": "❓",  # Question
    "答案": "💡",  # Answer
    "成功": "🏆",  # Success
    "失败": "💔",  # Failure
    "朋友": "👫",  # Friends
    "家人": "👨‍👩‍👧‍👦",  # Family
    "男朋友": "👨‍❤️‍👨",  # Boyfriend
    "女朋友": "👩‍❤️‍👩",  # Girlfriend
}


## Scrape & Translate

In [3]:
# Global variables to store DataFrames
ubc_reddit_df = pd.DataFrame()
uoft_reddit_df = pd.DataFrame()
uwaterloo_reddit_df = pd.DataFrame()

# Function to fetch Reddit data
def fetch_reddit_data(subreddit, pages=1):
    url = f'https://www.reddit.com/r/{subreddit}.json'
    headers = {'User-Agent': 'Mozilla/5.0 (compatible; Reddit Scraper/1.0)'}
    after = None  # Placeholder for the 'after' parameter
    all_posts = []
    
    try:
        for _ in range(pages):
            params = {'after': after} if after else {}
            response = requests.get(url, headers=headers, params=params)
            response.raise_for_status()  # Raise an exception for HTTP errors
            data = response.json()
            
            # Extract relevant data from the JSON (only title and content/selftext)
            for post in data['data']['children']:
                post_data = post['data']
                all_posts.append({
                    'Title': post_data.get('title'),
                    'Content': post_data.get('selftext', '')  # Use empty string if selftext is missing
                })
            
            # Get the 'after' key to fetch the next page
            after = data['data'].get('after')
            
            if not after:  # No more pages to fetch
                break
        
        # Convert to DataFrame
        df = pd.DataFrame(all_posts)
        return df
    
    except Exception as e:
        print(f"Error fetching data from /r/{subreddit}: {e}")
        return pd.DataFrame()

# Function to save the DataFrame to an Excel file and keep it as a global variable
def save_to_excel(df, subreddit):
    global ubc_reddit_df, uoft_reddit_df, uwaterloo_reddit_df
    
    # Save DataFrame to Excel
    if not df.empty:
        file_name = f"reddit/{subreddit}_Contents_{datetime.now().strftime('%Y_%m')}.xlsx"
        df.to_excel(file_name, index=False)
        print(f"Data from /r/{subreddit} saved to {file_name}")
        
        # Store the DataFrame in the correct global variable based on the subreddit
        if subreddit == 'UBC':
            ubc_reddit_df = df
        elif subreddit == 'UofT':
            uoft_reddit_df = df
        elif subreddit == 'uwaterloo':
            uwaterloo_reddit_df = df
    else:
        print(f"No data to save for /r/{subreddit}.")

# Main script execution
if __name__ == "__main__":
    # Dictionary to map subreddits to the number of pages to fetch
    subreddits_to_pages = {
        'UBC': 2,
        'UofT': 2,
        'uwaterloo': 1
    }
    
    # Loop through the subreddits and fetch/save data
    for subreddit, pages in subreddits_to_pages.items():
        df = fetch_reddit_data(subreddit, pages=pages)
        
        # Apply iloc to remove specific rows after fetching the data
        if subreddit == 'UBC':
            df = df.iloc[3:]  # Drop the first 3 rows
        elif subreddit == 'UofT':
            df = df.iloc[1:]  # Drop the first row
        elif subreddit == 'uwaterloo':
            df = df.iloc[2:]  # Drop the first 2 rows
        
        # Save the modified DataFrame to an Excel file and global variable
        save_to_excel(df, subreddit)

# You can now access the dataframes as:
# ubc_reddit_df, uoft_reddit_df, uwaterloo_reddit_df


Data from /r/UBC saved to reddit/UBC_Contents_2024_11.xlsx
Data from /r/UofT saved to reddit/UofT_Contents_2024_11.xlsx
Data from /r/uwaterloo saved to reddit/uwaterloo_Contents_2024_11.xlsx


In [4]:
# Initialize Deep Translator (Google)
translator = GoogleTranslator(source='en', target='zh-CN')

# Function to translate text from English to Chinese and add emojis
def translate_to_chinese(text, for_title=True):
    if pd.isna(text) or not text.strip():
        return text  # Return original if text is empty or NaN
    
    try:
        # Translate to Chinese (Simplified)
        translated = translator.translate(text)
        
        # Localize translation by rephrasing and adding informal language (口语化)
        localized_translation = translated.replace('你', '你呀')

        # Determine where to add the emoji (beginning for titles, end for content)
        emoji_to_add = None
        for word, emoji in emoji_map.items():
            if word in localized_translation:
                emoji_to_add = emoji
                break  # Add the first matching emoji found
        
        if emoji_to_add:
            if for_title:
                # For titles, randomly decide to add at the beginning or end
                localized_translation = f"{emoji_to_add}{localized_translation}" if len(localized_translation) % 2 == 0 else f"{localized_translation}{localized_translation}"
            else:
                # For content, always add at the end
                localized_translation = f"{localized_translation} {emoji_to_add}"

        return localized_translation
    except Exception as e:
        print(f"Error translating text: {e}")
        return text  # Return original text if translation fails

# Function to translate and save DataFrames
def process_and_translate_df(df, output_file):
    # Apply translation to Title and Content columns
    df['Translated_Title'] = df['Title'].apply(lambda x: translate_to_chinese(x, for_title=True))
    df['Translated_Content'] = df['Content'].apply(lambda x: translate_to_chinese(x, for_title=False))
    df['Combined_Content'] = df['Translated_Title'].fillna('') + ' ' + df['Translated_Content'].fillna('')
    
    # Save the translated dataframe into a new Excel file
    df.to_excel(output_file, index=False)
    
    print(f"Translated data saved to {output_file}")

# Main function to process each DataFrame
def translate_and_save_all():
    global ubc_reddit_df, uoft_reddit_df, uwaterloo_reddit_df

    # List of DataFrames and corresponding output filenames
    dfs_to_process = [
        (ubc_reddit_df, 'reddit/ubc_translated.xlsx'),
        (uoft_reddit_df, 'reddit/uoft_translated.xlsx'),
        (uwaterloo_reddit_df, 'reddit/uwaterloo_translated.xlsx')
    ]
    
    # Loop over each DataFrame and process it
    for df, output_file in dfs_to_process:
        if not df.empty:
            process_and_translate_df(df, output_file)
        else:
            print(f"No data available in DataFrame for {output_file}")

# Call the function to translate and save all DataFrames
translate_and_save_all()


Translated data saved to reddit/ubc_translated.xlsx
Translated data saved to reddit/uoft_translated.xlsx
Translated data saved to reddit/uwaterloo_translated.xlsx


# Put the reddit scrape in excel

In [5]:

def create_daily_dataframe(ubc_reddit_df, uoft_reddit_df, uwaterloo_reddit_df, content_column='Combined_Content'):
    # Create the daily DataFrame with the specified columns
    columns = ['星期八', 'BE美学', '小家伙', '轻留同学2236', '寄情书', '我也无法忍受我', '幻听', 
               '可丽', '小狗驾到', '不为坏男人掉珍珠', '鱼丸汤', 'liny', '衍', 'nia', 'flimna']
    
    daily = pd.DataFrame(columns=columns)
    
    # Add the fixed second row
    fixed_row = ['多伦多大学', 'SC', '多伦多大学', 'SC2', '多伦多', 'Academy - tor', '多伦多大学', 
                 'SC3 - torfood', '多伦多大学', 'SC4 - tornearby', '温哥华', '中学 - van', 
                 '温哥华', '中学2 - vanfood', '温哥华', 'UBC', '温哥华', 'UBC2', '温哥华', 
                 'UBC3 vannearby', '纽芬兰 - ott', '纽芬兰2 - tv', '纽芬兰3 - ha', '纽芬兰4 - book', 
                 '滑铁卢', '劳瑞儿']
    daily.loc[0] = fixed_row[:len(columns)]  # Ensure it fits the number of columns
    
    # Determine the number of rows to create (use the max length of input DataFrames)
    max_rows = max(len(ubc_reddit_df), len(uoft_reddit_df), len(uwaterloo_reddit_df))
    
    # Create empty rows
    for i in range(1, max_rows + 1):  # +1 to account for the fixed row
        daily.loc[i] = [''] * len(columns)
    
    # Reset index for input DataFrames to ensure we start from 0
    ubc_reddit_df = ubc_reddit_df.reset_index(drop=True)
    uoft_reddit_df = uoft_reddit_df.reset_index(drop=True)
    uwaterloo_reddit_df = uwaterloo_reddit_df.reset_index(drop=True)
    
    # Populate the DataFrame
    ubc_index, uoft_index, uwaterloo_index = 0, 0, 0
    for i in range(1, max_rows + 1):  # Start from 1 to skip the fixed row
        # UBC content (可丽 and 小狗驾到)
        if ubc_index < len(ubc_reddit_df):
            daily.loc[i, '可丽'] = ubc_reddit_df.loc[ubc_index, content_column]
            if ubc_index + 1 < len(ubc_reddit_df):
                daily.loc[i, '小狗驾到'] = ubc_reddit_df.loc[ubc_index + 1, content_column]
            ubc_index += 2
        
        # UofT content (星期八 and BE美学)
        if uoft_index < len(uoft_reddit_df):
            daily.loc[i, '星期八'] = uoft_reddit_df.loc[uoft_index, content_column]
            if uoft_index + 1 < len(uoft_reddit_df):
                daily.loc[i, 'BE美学'] = uoft_reddit_df.loc[uoft_index + 1, content_column]
            uoft_index += 2
        
        # Waterloo content (flimna)
        if uwaterloo_index < len(uwaterloo_reddit_df):
            daily.loc[i, 'flimna'] = uwaterloo_reddit_df.loc[uwaterloo_index, content_column]
            uwaterloo_index += 1
    
    return daily

# Usage:
# Assuming you have ubc_reddit_df, uoft_reddit_df, and uwaterloo_reddit_df already loaded
daily_df = create_daily_dataframe(ubc_reddit_df, uoft_reddit_df, uwaterloo_reddit_df, content_column='Combined_Content')

# # Save the result to an Excel file
daily_df.to_excel('daily_df.xlsx', index=False)


In [6]:
daily_df.head()

Unnamed: 0,星期八,BE美学,小家伙,轻留同学2236,寄情书,我也无法忍受我,幻听,可丽,小狗驾到,不为坏男人掉珍珠,鱼丸汤,liny,衍,nia,flimna
0,多伦多大学,SC,多伦多大学,SC2,多伦多,Academy - tor,多伦多大学,SC3 - torfood,多伦多大学,SC4 - tornearby,温哥华,中学 - van,温哥华,中学2 - vanfood,温哥华
1,多伦多大学将博士生基本资助增加至每年 4 万美元 “从明年秋季开始，所有博士和法学博士项目资...,悉尼史密斯学院举办“学生抗癌烘焙义卖”活动 伙计们，自制的饼干太好吃了！快来吧！ 🍽️,,,,,,计算机科学计划危机 我是一名新生，我已经接受了这样一个事实，即我可能在第二年无法进入计算机科...,❓有人知道这是什么吗？ 如果这个之前发布过的话，我很抱歉，但我在巢穴里发现了这个，我很好奇是...,,,,,,🚗E7 钢琴通过钻通孔锁定自行车锁
2,⌚好了，伙计们，从现在开始我要锁定目标了，不能再偷懒了 我不敢相信自己会在凌晨 3 点写这篇...,这个光彩夺目的怪异“O”成了我朋友之间的一个梗这个光彩夺目的怪异“O”成了我朋友之间的一个梗,,,,,,🙏请远离建筑物吸烟 根据法律规定，如果您吸烟，您必须距离建筑物 6 米。特别是在住所，请多加...,❓艺术合作问题 大家好！我即将结束我在 Arts Co-op 的第二个工作期，有几个问题：\...,,,,,,我讨厌 eduroam 我讨厌 eduroam 我讨厌 eduroam 我讨厌 eduroa...
3,我制作了一个成绩计算器应用程序，因为 Excel 表格太麻烦了 [https://cours...,😍您最喜欢在校园周围哪些适合学习的咖啡馆？ 我总是去 bloom 或 lait night，...,,,,,,HARVEST 积极欺骗其客户——除非你呀想为你呀没有买的东西付钱，否则不要在那里购物HAR...,永志难忘。,,,,,,@csuhotsxfyvz 被发现缺乏哈哈
4,研究生会基础资助委员会关于增加资助的声明 从学生组织角度对资金增加的最新情况进行更新。,被停职的哥伦比亚大学教授在国王学院举行犹太复国主义集会,,,,,,“大学经历”：它是什么？ 在我们短暂的读书休息期间看到人们去旅行（在 BC 省、加拿大、国外...,💪让我们渡过难关💪 [https://www.youtube.com/watch?v=BET...,,,,,,❓我该退出我的第六个合作社吗？ 大家好，\n\n我正在进行第 6 个实习期（已经完成了计算机...


# Red

In [7]:
# Initialize global DataFrames
xhs_tor = pd.DataFrame()
xhs_torfood = pd.DataFrame()
xhs_tornearby = pd.DataFrame()

xhs_van = pd.DataFrame()
xhs_vanfood = pd.DataFrame()
xhs_vannearby = pd.DataFrame()

xhs_ott = pd.DataFrame()
xhs_ha = pd.DataFrame()

xhs_tv = pd.DataFrame()
xhs_book = pd.DataFrame()

In [8]:
# Allow nested event loops
nest_asyncio.apply()

# Function to load cookies from a file
def load_cookies_from_file(file_path):
    with open(file_path, "r") as file:
        return json.load(file)

# Function to set cookies in the Playwright page
async def set_cookies(page, cookies):
    await page.context.add_cookies(cookies)

# Function to save images with error checking
def save_image(image_url, save_dir, image_name):
    response = requests.get(image_url)
    if response.status_code == 200:
        file_path = os.path.join(save_dir, image_name)
        try:
            with open(file_path, "wb") as file:
                file.write(response.content)
            print(f"Image saved to {file_path}")
            return file_path
        except Exception as e:
            print(f"Failed to save image to {file_path}. Error: {e}")
            return ""
    else:
        print(f"Failed to download image from {image_url}, HTTP Status Code: {response.status_code}")
        return ""

# Function to attempt loading a page with retries
async def goto_with_retry(page, url, retries=3, timeout=10000000):
    for attempt in range(retries):
        try:
            await page.goto(url, timeout=timeout)
            print(f"Successfully loaded: {url}")
            return
        except PlaywrightTimeoutError:
            print(f"Attempt {attempt + 1} failed: Timeout while loading {url}")
            if attempt == retries - 1:
                raise
            print("Retrying...")

In [9]:
# Main scraping function
async def scrape_with_playwright(url, save_file, images_dir, df_ref):
    global xhs_tor, xhs_torfood, xhs_tornearby, xhs_van, xhs_vanfood, xhs_vannearby, xhs_ott, xhs_ha, xhs_tv, xhs_book # Declare globals

    async with async_playwright() as p:
        browser = await p.chromium.launch(channel="chrome", headless=True)
        context = await browser.new_context()
        page = await context.new_page()

        cookies = load_cookies_from_file("xhs_crawler/Testing/cookie.json")
        await set_cookies(page, cookies)

        await goto_with_retry(page, url)
        await page.wait_for_timeout(random.randint(5000, 10000))

        data = []
        os.makedirs(images_dir, exist_ok=True)

        for _ in range(15):
            element = await page.locator(
                "#global > div.main-container > div.with-side-bar.main-content > div > div.feeds-container"
            ).element_handle(timeout=10000000)

            soup = BeautifulSoup(await element.inner_html(), "lxml")

            notes = soup.select(".note-item[data-width]")
            for note in notes[:15]:
                title_element = note.select_one(".title > span")
                title = title_element.get_text() if title_element else ""
                note_link = "https://www.xiaohongshu.com" + note.select_one(".cover").get("href")

                await goto_with_retry(page, note_link)
                await page.wait_for_timeout(random.randint(2000, 5000))

                note_content_html = await page.content()
                note_soup = BeautifulSoup(note_content_html, "lxml")
                note_content = note_soup.select_one("#detail-desc .note-text").get_text(strip=True) if note_soup.select_one("#detail-desc .note-text") else ""

                additional_images = note_soup.select(".note-slider-img")[:4]
                additional_image_paths = []
                for idx, img in enumerate(additional_images):
                    img_url = img["src"]
                    image_path = save_image(img_url, images_dir, f"{title.replace(' ', '_')}_image_{idx + 1}.jpg")
                    additional_image_paths.append(image_path)
                
                data.append({
                    "title": title,
                    "note_content": note_content,
                })

            if len(data) >= 15:
                break

        new_df = pd.DataFrame(data)
        
        # Update the correct global DataFrame
        if df_ref is xhs_tor:
            xhs_tor = pd.concat([xhs_tor, new_df], ignore_index=True)
        elif df_ref is xhs_torfood:
            xhs_torfood = pd.concat([xhs_torfood, new_df], ignore_index=True)
        elif df_ref is xhs_tornearby:
            xhs_tornearby = pd.concat([xhs_tornearby, new_df], ignore_index=True)
        # van
        elif df_ref is xhs_van:
            xhs_van = pd.concat([xhs_van, new_df], ignore_index=True)
        elif df_ref is xhs_vanfood:
            xhs_vanfood = pd.concat([xhs_vanfood, new_df], ignore_index=True)
        elif df_ref is xhs_vannearby:
            xhs_vannearby = pd.concat([xhs_vannearby, new_df], ignore_index=True)
        # ott ha
        elif df_ref is xhs_ott:
            xhs_ott = pd.concat([xhs_ott, new_df], ignore_index=True)
        elif df_ref is xhs_ha:
            xhs_ha = pd.concat([xhs_ha, new_df], ignore_index=True)
        # tv book
        elif df_ref is xhs_tv:
            xhs_tv = pd.concat([xhs_tv, new_df], ignore_index=True)
        elif df_ref is xhs_book:
            xhs_book = pd.concat([xhs_book, new_df], ignore_index=True)
        
        # Save the updated DataFrame to Excel
        if df_ref is xhs_tor:
            xhs_tor.to_excel(save_file, index=False)
        elif df_ref is xhs_torfood:
            xhs_torfood.to_excel(save_file, index=False)
        elif df_ref is xhs_tornearby:
            xhs_tornearby.to_excel(save_file, index=False)
        #  van
        elif df_ref is xhs_van:
            xhs_van.to_excel(save_file, index=False)
        elif df_ref is xhs_vanfood:
            xhs_vanfood.to_excel(save_file, index=False)
        elif df_ref is xhs_vannearby:
            xhs_vannearby.to_excel(save_file, index=False)
        # ott ha
        elif df_ref is xhs_ott:
            xhs_ott.to_excel(save_file, index=False)
        elif df_ref is xhs_ha:
            xhs_ha.to_excel(save_file, index=False)
        #  tv book
        elif df_ref is xhs_tv:
            xhs_tv.to_excel(save_file, index=False)
        elif df_ref is xhs_book:
            xhs_book.to_excel(save_file, index=False)
        
        
        print(f"Data saved to {save_file}")
        await browser.close()

# Run the scraper for multiple URLs
async def run_scrapers():
    global xhs_tor, xhs_torfood, xhs_tornearby, xhs_van, xhs_vanfood, xhs_vannearby, xhs_ott, xhs_ha, xhs_tv, xhs_book

    urls = [
        # tor
        {"url": "https://www.xiaohongshu.com/search_result?keyword=%25E5%25A4%259A%25E4%25BC%25A6%25E5%25A4%259A&source=web_explore_feed",
         "save_file": "/Users/xiaofanjiao/Desktop/QL AI/xhs_crawler/tor.xlsx",
         "images_dir": "tor_img",
         "df_ref": xhs_tor},

        # tor_food
        {"url": "https://www.xiaohongshu.com/search_result?keyword=%25E5%25A4%259A%25E4%25BC%25A6%25E5%25A4%259A%25E7%25BE%258E%25E9%25A3%259F&source=web_search_result_notes", 
         "save_file": "/Users/xiaofanjiao/Desktop/QL AI/xhs_crawler/torfood.xlsx", 
         "images_dir": "torfood_img",
         "df_ref": xhs_torfood}, 

        #  tor_nearby
        {"url": "https://www.xiaohongshu.com/search_result?keyword=%25E5%25A4%259A%25E4%25BC%25A6%25E5%25A4%259A%25E5%2591%25A8%25E8%25BE%25B9&source=web_search_result_notes", 
         "save_file": "/Users/xiaofanjiao/Desktop/QL AI/xhs_crawler/tornearby.xlsx", 
         "images_dir": "tornearby_img",
          "df_ref": xhs_tornearby}, 

        #  van
        {"url": "https://www.xiaohongshu.com/search_result?keyword=%25E6%25B8%25A9%25E5%2593%25A5%25E5%258D%258E&source=web_search_result_notes", 
        "save_file": "/Users/xiaofanjiao/Desktop/QL AI/xhs_crawler/scraped/van.xlsx", 
        "images_dir": "van_img",
        "df_ref": xhs_van}, 

        #  van_food
        {"url": "https://www.xiaohongshu.com/search_result?keyword=%25E6%25B8%25A9%25E5%2593%25A5%25E5%258D%258E%25E7%25BE%258E%25E9%25A3%259F&source=web_search_result_notes", 
         "save_file": "/Users/xiaofanjiao/Desktop/QL AI/xhs_crawler/scraped/vanfood.xlsx", 
        "images_dir": "vanfood_img",
        "df_ref": xhs_vanfood}, 

        #  van_nearby
        {"url": "https://www.xiaohongshu.com/search_result?keyword=%25E6%25B8%25A9%25E5%2593%25A5%25E5%258D%258E%25E5%2591%25A8%25E8%25BE%25B9&source=web_search_result_notes", 
         "save_file": "/Users/xiaofanjiao/Desktop/QL AI/xhs_crawler/scraped/vannearby.xlsx", 
        "images_dir": "van_nearby.img",
        "df_ref": xhs_vannearby}, 

        # ott
         {"url": "https://www.xiaohongshu.com/search_result?keyword=%25E6%25B8%25A5%25E5%25A4%25AA%25E5%258D%258E&source=web_search_result_notes", 
        "save_file": "/Users/xiaofanjiao/Desktop/QL AI/xhs_crawler/scraped/ott.xlsx", 
        "images_dir": "ott_img",
        "df_ref": xhs_ott}, 

         #  halifx
        {"url": "https://www.xiaohongshu.com/search_result?keyword=%25E5%2593%2588%25E6%25B3%2595%25E7%25BE%258E%25E9%25A3%259F&source=web_search_result_notes", 
         "save_file": "/Users/xiaofanjiao/Desktop/QL AI/xhs_crawler/scraped/ha.xlsx", 
        "images_dir": "ha_img",
        "df_ref": xhs_ha}, 

         #  tv
        {"url": "https://www.xiaohongshu.com/search_result?keyword=%25E7%2594%25B5%25E8%25A7%2586%25E7%2594%25B5%25E5%25BD%25B1%25E6%258E%25A8%25E8%258D%2590&source=web_search_result_notes", 
         "save_file": "/Users/xiaofanjiao/Desktop/QL AI/xhs_crawler/scraped/tv.xlsx", 
         "images_dir": "tv_img",
        "df_ref": xhs_tv}, 
    
        # book
        {"url": "https://www.xiaohongshu.com/search_result?keyword=%25E8%258B%25B1%25E6%2596%2587%25E4%25B9%25A6%25E6%258E%25A8%25E8%258D%2590&source=web_search_result_notes", 
        "save_file": "/Users/xiaofanjiao/Desktop/QL AI/xhs_crawler/scraped/book.xlsx", 
        "images_dir": "book_img",
        "df_ref": xhs_book} 
        ]

    for entry in urls:
        await scrape_with_playwright(entry["url"], entry["save_file"], entry["images_dir"], entry["df_ref"])

# Run the complete scraping process
if __name__ == "__main__":
    asyncio.run(run_scrapers())

Successfully loaded: https://www.xiaohongshu.com/search_result?keyword=%25E5%25A4%259A%25E4%25BC%25A6%25E5%25A4%259A&source=web_explore_feed
Successfully loaded: https://www.xiaohongshu.com/search_result/67337b74000000001a01efc9?xsec_token=AB30tSRBv6-_C5U1YR9lT3dJzSNLFW6_HBq8tk2qNRCbI=&xsec_source=pc_search
Image saved to tor_img/多伦多酒厂圣诞集市明天周三回归🎄附攻略_image_1.jpg
Image saved to tor_img/多伦多酒厂圣诞集市明天周三回归🎄附攻略_image_2.jpg
Image saved to tor_img/多伦多酒厂圣诞集市明天周三回归🎄附攻略_image_3.jpg
Image saved to tor_img/多伦多酒厂圣诞集市明天周三回归🎄附攻略_image_4.jpg
Successfully loaded: https://www.xiaohongshu.com/search_result/67337007000000003c01b714?xsec_token=AB30tSRBv6-_C5U1YR9lT3dDQdZHDAc4rdaSfWjmv6nEU=&xsec_source=pc_search
Image saved to tor_img/🇨🇦多伦多冬季特别活动🆓彩灯➕表演➕美食_image_1.jpg
Image saved to tor_img/🇨🇦多伦多冬季特别活动🆓彩灯➕表演➕美食_image_2.jpg
Image saved to tor_img/🇨🇦多伦多冬季特别活动🆓彩灯➕表演➕美食_image_3.jpg
Image saved to tor_img/🇨🇦多伦多冬季特别活动🆓彩灯➕表演➕美食_image_4.jpg
Successfully loaded: https://www.xiaohongshu.com/search_result/6732a64c00000000

# Edit

In [10]:
df_list = [xhs_tor, xhs_torfood, xhs_tornearby
        , xhs_van, xhs_vanfood, xhs_vannearby
        , xhs_ott, xhs_ha, xhs_tv, xhs_book
        ]

# Loop through each DataFrame and combine the 'title' and 'note_content' columns
for df in df_list:
    df['combined_content'] = df['title'].fillna('') + ' ' + df['note_content'].fillna('')

In [11]:
xhs_torfood

Unnamed: 0,title,note_content,combined_content
0,🇨🇦多伦多中餐馆地图-四大中餐美食圈,都说多伦多是北美中餐天花板，这张地图总结了多伦多150+家知名中餐馆，八大菜系，分布在四大美...,🇨🇦多伦多中餐馆地图-四大中餐美食圈 都说多伦多是北美中餐天花板，这张地图总结了多伦多150...
1,🇨🇦不能错过的10+多伦多DT中餐美食合集😋,多伦多不愧是北美中餐天花板，好吃的那是一个多啊，真的太适合中国宝宝体质了！我整理的downt...,🇨🇦不能错过的10+多伦多DT中餐美食合集😋 多伦多不愧是北美中餐天花板，好吃的那是一个多啊...
2,🇨🇦在多伦多喝早茶还得是广州人带路！！,老廣推薦🇨🇳\n周末就喜欢喝家里人一起喝早茶🍵聊聊天\n早茶真的是老广们的蕞爱\n开了十几年...,🇨🇦在多伦多喝早茶还得是广州人带路！！ 老廣推薦🇨🇳\n周末就喜欢喝家里人一起喝早茶🍵聊聊天...
3,多伦多苍蝇馆子｜排队1小时+依旧觉得很值🌶️,周五快乐呀，朋友们🌶️\n坠近去吃了dt爆火的苍蝇馆子🔥据说和拈一筷子是一个老板开的😋虽然我...,多伦多苍蝇馆子｜排队1小时+依旧觉得很值🌶️ 周五快乐呀，朋友们🌶️\n坠近去吃了dt爆火的...
4,🇨🇦 多伦多美食｜24小时餐厅推荐,临近年底大家是不是都变得忙碌起来了，打工人的年末加班，学生党的final赶due。常常忙的没...,🇨🇦 多伦多美食｜24小时餐厅推荐 临近年底大家是不是都变得忙碌起来了，打工人的年末加班，学...
5,🇨🇦多伦多冬季特别活动🆓彩灯➕表演➕美食,👏多伦多市中心一年一度的彩灯节Cavalcade of Lights即将在这个月底正式开启～...,🇨🇦多伦多冬季特别活动🆓彩灯➕表演➕美食 👏多伦多市中心一年一度的彩灯节Cavalcade ...
6,🇨🇦7天座无虚席🔥半个多伦多都跑你这了吧…,杨三嬢跷脚牛肉啊你该扩Zhang了……\n你这种实力派…实惠好吃又接地气\n要知道你不能让爱...,🇨🇦7天座无虚席🔥半个多伦多都跑你这了吧… 杨三嬢跷脚牛肉啊你该扩Zhang了……\n你这种...
7,多伦多｜我愿称之为加拿大的美食天堂‼️,记录一次温哥华✈️多伦多的美食之旅，本以为温哥华已经是美食云集了，没想到多伦多才是🇨🇦的美食...,多伦多｜我愿称之为加拿大的美食天堂‼️ 记录一次温哥华✈️多伦多的美食之旅，本以为温哥华已经...
8,🇨🇦UofT/多伦多去哪吃饭？！超详细绿色无广,如题，绿色无广，有图有真相\n都是自己付钱一口口尝出来滴，放心看就对了！按评分高到低排序。（...,🇨🇦UofT/多伦多去哪吃饭？！超详细绿色无广 如题，绿色无广，有图有真相\n都是自己付钱一...
9,🇨🇦多伦多今年的成都美食黑马出现了！🔥🌶️,这家新开的成都冒菜馆就这样水灵灵地出现在YF啦！最近气温都在逐渐下降，急需来份冒菜暖暖身子店...,🇨🇦多伦多今年的成都美食黑马出现了！🔥🌶️ 这家新开的成都冒菜馆就这样水灵灵地出现在YF啦！...


In [12]:
# Function to match columns and assign content starting from row 2 (index 1) while handling length mismatch
def assign_content_to_main_df(main_df, source_df, main_column, source_column):
    # Calculate the number of rows to copy, limited by the smaller DataFrame
    num_rows = min(len(source_df), len(main_df) - 1)  # Account for starting at index 1
    
    # Insert content from source_df into main_df starting from row 2 (index 1)
    main_df.loc[1:1 + num_rows - 1, main_column] = source_df.loc[:num_rows - 1, source_column].values


# Now, assign content from the smaller DataFrames to the respective columns in daily_df_condense

# Assign from xhs_tor to daily_df_condense
assign_content_to_main_df(daily_df, xhs_tor, '小家伙', 'combined_content')
assign_content_to_main_df(daily_df, xhs_torfood, '轻留同学2236', 'combined_content')
assign_content_to_main_df(daily_df, xhs_tornearby, '寄情书', 'combined_content')

assign_content_to_main_df(daily_df, xhs_van, '我也无法忍受我', 'combined_content')
assign_content_to_main_df(daily_df, xhs_vanfood, '幻听', 'combined_content')
assign_content_to_main_df(daily_df, xhs_vannearby, '不为坏男人掉珍珠', 'combined_content')

assign_content_to_main_df(daily_df, xhs_ott, '鱼丸汤', 'combined_content')
assign_content_to_main_df(daily_df, xhs_ha, 'liny', 'combined_content')

assign_content_to_main_df(daily_df, xhs_tv, '衍', 'combined_content')
assign_content_to_main_df(daily_df, xhs_book, 'nia', 'combined_content')

In [13]:
daily_df

Unnamed: 0,星期八,BE美学,小家伙,轻留同学2236,寄情书,我也无法忍受我,幻听,可丽,小狗驾到,不为坏男人掉珍珠,鱼丸汤,liny,衍,nia,flimna
0,多伦多大学,SC,多伦多大学,SC2,多伦多,Academy - tor,多伦多大学,SC3 - torfood,多伦多大学,SC4 - tornearby,温哥华,中学 - van,温哥华,中学2 - vanfood,温哥华
1,多伦多大学将博士生基本资助增加至每年 4 万美元 “从明年秋季开始，所有博士和法学博士项目资...,悉尼史密斯学院举办“学生抗癌烘焙义卖”活动 伙计们，自制的饼干太好吃了！快来吧！ 🍽️,多伦多酒厂圣诞集市明天周三回归🎄附攻略 要说多伦多最著名的圣诞集市，一定有古酿酒厂区它的名字...,🇨🇦多伦多中餐馆地图-四大中餐美食圈 都说多伦多是北美中餐天花板，这张地图总结了多伦多150...,多倫多周邊必訪冬季浪漫小鎮！超有氛圍感 📍 Bracebridge\n這座位於Muskoka...,再见温哥华：离开前的美食合集 准备回国了，记录一下今年吃到喜欢的美食\n\t\nJust P...,从美国来温哥华 local带我吃的20家店 从尔湾中餐荒漠来温哥华找闺蜜玩啦！\n温哥华真的...,计算机科学计划危机 我是一名新生，我已经接受了这样一个事实，即我可能在第二年无法进入计算机科...,❓有人知道这是什么吗？ 如果这个之前发布过的话，我很抱歉，但我在巢穴里发现了这个，我很好奇是...,👌看图就懂｜温哥华景点分布指南＋攻略 还没有来过温哥华吗？公园、岛屿、海滩、峡谷、原野在这座...,渥太华美食｜我好爱Byward集市！超好吃好逛 宝藏Market，三天有好几顿在这吃，露天的...,多伦多老饕勇闯哈利法克斯美食圈｜哈法美食 嗨朋友们，这里是刚刚从哈法回来的奇奇。这次奇奇又吃...,傲娇女明星❌天才翻译官❗这搭配绝了❗ 剧名：#这爱情可以翻译吗主演:#金宣虎#高允真人间甜豆...,🔥火爆的10本欧美英文书 | 必读推介！ 1️⃣ Atomic Habits\n作者｜詹姆斯...,🚗E7 钢琴通过钻通孔锁定自行车锁
2,⌚好了，伙计们，从现在开始我要锁定目标了，不能再偷懒了 我不敢相信自己会在凌晨 3 点写这篇...,这个光彩夺目的怪异“O”成了我朋友之间的一个梗这个光彩夺目的怪异“O”成了我朋友之间的一个梗,🇨🇦多伦多冬季特别活动🆓彩灯➕表演➕美食 👏多伦多市中心一年一度的彩灯节Cavalcade ...,🇨🇦不能错过的10+多伦多DT中餐美食合集😋 多伦多不愧是北美中餐天花板，好吃的那是一个多啊...,多伦多周边也有自己的奈良鹿公园😍 标题党了一下，虽然这个鹿公园是有围栏隔开的，但是这里真的有...,👌看图就懂｜温哥华景点分布指南＋攻略 还没有来过温哥华吗？公园、岛屿、海滩、峡谷、原野在这座...,🇨🇦人生建议：吃完这些再离开温哥华 来温哥华玩了一个星期\n打卡了xhs上比较火的一些餐厅～...,🙏请远离建筑物吸烟 根据法律规定，如果您吸烟，您必须距离建筑物 6 米。特别是在住所，请多加...,❓艺术合作问题 大家好！我即将结束我在 Arts Co-op 的第二个工作期，有几个问题：\...,🇨🇦大温十一月出游攻略 圣诞🎄专辑 🎄圣诞集市\n近百个圣诞小屋🏠可以逛！还有圣诞灯树、旋转...,多伦多➡️渥太华Ottawa🇨🇦攻略｜旅游Live 欢迎来到加拿大首都——渥太华🇨🇦Otta...,🇨🇦小城foodie留学生｜一篇带你吃完哈法（上 ⭐️Ko-Doraku\n少见的不是亚洲融...,后劲太大了❗️边笑边哭的爱情电影😭 🎬名： We live in time\n\t\n北美正...,2024上半年治愈书单｜每一本都巨好看的合集 24年已经过去一半啦，为大家的书单上再添几本超...,我讨厌 eduroam 我讨厌 eduroam 我讨厌 eduroam 我讨厌 eduroa...
3,我制作了一个成绩计算器应用程序，因为 Excel 表格太麻烦了 [https://cours...,😍您最喜欢在校园周围哪些适合学习的咖啡馆？ 我总是去 bloom 或 lait night，...,刚吃完！！谁敢相信这是24年多伦多物价！ 太震惊了家人们！谁懂随机走进一家粤菜馆，吃到了多伦...,🇨🇦在多伦多喝早茶还得是广州人带路！！ 老廣推薦🇨🇳\n周末就喜欢喝家里人一起喝早茶🍵聊聊天...,🇨🇦多伦多霉霉周边 不说9点才开始排队嘛\n8:50到的时候就好多人了#霉霉#taylor霉...,11月16日 温哥华手工艺假日快闪市场回归 温哥华的假日购物季即将到来！备受欢迎的Vanco...,温哥华美食地图更新！虾图人二刷Richmond 又在大温暴风吸入了一周，美食地图更新如下：糖...,HARVEST 积极欺骗其客户——除非你呀想为你呀没有买的东西付钱，否则不要在那里购物HAR...,永志难忘。,🇨🇦温哥华周边｜一起泡汤啦～♨️ 雨天约会好去处，当然泡温泉是首选啦～不太想跑whistle...,渥太华｜家人们🥟我在饺子店吃到了黑暗料理 它家为什么生意不好？去了几次都没有人，你们都快去试...,Bicycle Thief是我在Halifax最爱的餐厅！ 因为他家的菜随便点都没有雷\n区...,新片上线‼️我那从天而降的高富帅男友🥳好看 片名：《刨根问底》类型：爱情 剧情上线时间：20...,美国豆瓣goodreads✅近十年受欢迎书籍top76 #浪漫生活的记录者全球蕞大书评网站g...,@csuhotsxfyvz 被发现缺乏哈哈
4,研究生会基础资助委员会关于增加资助的声明 从学生组织角度对资金增加的最新情况进行更新。,被停职的哥伦比亚大学教授在国王学院举行犹太复国主义集会,多伦多DT超大份日韩猪排店 📍Lucky Donkatsu - 415 Yonge St U...,多伦多苍蝇馆子｜排队1小时+依旧觉得很值🌶️ 周五快乐呀，朋友们🌶️\n坠近去吃了dt爆火的...,🇨🇦多伦多包车｜冬天的布鲁斯可能比夏天美 多伦多周边必打卡的景点除了尼亚加拉瀑布，我觉得就是...,温哥华探店｜下班后日式小酒馆见🍻Guu Guu with Garlic\n📍1698 Rob...,🇨🇦温哥华干饭记录📝 来签证一周吃了点啥 在温哥华remote工作了一周 好吃的太多了 西雅...,“大学经历”：它是什么？ 在我们短暂的读书休息期间看到人们去旅行（在 BC 省、加拿大、国外...,💪让我们渡过难关💪 [https://www.youtube.com/watch?v=BET...,2h可达！🇨🇦温哥华6个宝藏小镇！ ❶Steveston Village\n🚙 20分钟\n...,🇨🇦Costco渥太华巴屯11.11-11.17特价 本周折扣来袭#渥太华房地产#安省Cos...,挑战在哈法吃100家餐厅24/100 #挑战在哈法吃100家餐厅今天又是把我的宝藏餐厅拿出来...,狼王贵公子❌娇俏狼公主🔥直接甜晕算了！ 💫被2024全新玛丽苏奇幻爱情片《Forbidden...,超好看！这本绝对会成为经典👍🏻 #我的私人书单📚 All the Light We Cann...,❓我该退出我的第六个合作社吗？ 大家好，\n\n我正在进行第 6 个实习期（已经完成了计算机...
5,❓在校园里开一辆汉堡餐车，你呀会买吗？ 我是一名本科生……基本上吃过所有餐车上的沙威玛。我正...,❓有人注意到校园里缺少厨房和小厨房吗？ 有人知道校园里有哪些可以开放或预订的厨房空间吗？我敢...,巨型友谊手链也能交换！概念好绝！ 加拿大多伦多即将举办时代巡演的体育场在门口也挂上了巨型友谊...,🇨🇦 多伦多美食｜24小时餐厅推荐 临近年底大家是不是都变得忙碌起来了，打工人的年末加班，学...,多伦多周边2h可达｜7个宝藏小镇超治愈💐 𝐁𝐚𝐲𝐟𝐢𝐞𝐥𝐝\n🚗多伦多出发2.5h\n从城市...,温哥华的秋天🍂 isfj才会懂得一些瞬间#生活需要分享欲#独居女孩#温哥华生活#小猫日记#i...,温哥华美食｜列治文公共市场超全美食合集 在温哥华上班的日子中午没事就跑去公共市场吃吃喝喝，这...,安德斯·克拉尔玻璃公司 现在上 kraal 的课，我不明白为什么这么夸张。我在 reddit...,Phil120 帮助/导师 有人参加过 phil120 并且取得了好成绩吗？我本不应该参加这...,🇨🇦在温哥华周边！Mission小镇让你秒回瑞士 #旅游Mission米逊真是个宝藏小镇\n...,🇨🇦渥太华一日游攻略（轻松版） 1️⃣第一站 National Gallery of Can...,哈法🇨🇦 吃喝七天推荐给大家的9间餐厅 上 1️⃣ Evan’s Fresh Seafood...,Netflix新片‼️尺度炸裂💥结局反转太绝了😱 好久没看过这么有意思的犯罪喜剧片了🤣\n“...,✨我宣布，這是今年讀過最治癒的一本書💞 #浪漫生活的记录者In five years | 4...,❓之前有人换过 cs 吗？ 不确定我是否真的喜欢做计算机科学，所以我一直想放弃这个专业。有这...
6,考虑在米切纳大学参加联合项目，寻求见解 大家好，\n\n我是约克大学运动机能学专业的二年级学...,UTAPS 问题重新评估秋季学期和冬季学期UTAPS 问题重新评估秋季学期和冬季学期 这是我...,圣诞气氛越来越浓了～GTA本周活动上新 #圣诞节活动#不辜负每个周末1️⃣ Bass Pro...,🇨🇦多伦多冬季特别活动🆓彩灯➕表演➕美食 👏多伦多市中心一年一度的彩灯节Cavalcade ...,🇨🇦多伦多周边秋游合集｜5大好去处📍 今年去了好几个地方感受秋天的气息～ 体验下来都很不错来...,🇨🇦温哥华靶场打枪详细攻略（无需枪证）💥 温哥华雨季到来\n周末不知道玩什么建议一定去一次室...,温哥华日料又上新了Toyokan Bowl🏮 🎳10.8刚刚新鲜出炉的日料店，看到是New ...,🏫ubc 的学术要求是否比其他排名前 50 的学校更严格？ 我很快就要申请研究生院了，我想知...,UBC 提供价格低于 5 美元的实惠餐食,温哥华陪伴师 ❤️ 温哥华周边一日游 Mission+Fort Langley\n\t\n是...,🇨🇦渥太华🍖探到一家不可错过的土耳其烤肉 【Tava Turkish Cuisine】\n📍...,太好了哈法又多一家早茶 忍不住点了梅子烧鸭\n点的每一个都好吃\n鱿鱼须现炸的非常酥脆甚至来...,强推❗❗年下忠犬少年✖️阳光少女 出自剧：听我说（duy beni）\n\t\n上映日期：2...,书单| 25 本书让你掌握5项核心能力 Library Mindset 推荐的这 25 本书...,我制作了一个成绩计算器应用程序，因为 Excel 表格太麻烦了 [https://cours...
7,我需要建议是否申请延迟提款并且不能使用 c/nc 请帮忙我需要建议我需要建议是否申请延迟提款...,👍我已为 CHM135 做好准备，需要 CHM136 扫描（字符限制） 我刚刚丢掉了整个实验...,"🇨🇦多伦多新派越南菜天花板让我找到了❗️ 跟朋友聚餐，来试了一下新派越南菜La Sen, 这...",🇨🇦7天座无虚席🔥半个多伦多都跑你这了吧… 杨三嬢跷脚牛肉啊你该扩Zhang了……\n你这种...,🇨🇦多伦多自己的丹霞地貌🍁徒步赏秋好去处 想去周边小镇逛逛+找个简单的trail走走，搜到了...,🇨🇦温哥华的雨季，海是蓝不了一点，灰蒙蒙 能耐受大半年阴雨天气的人，应该都有强大的抗抑郁能力...,🇨🇦温哥华｜中餐大满足之旅🥣 在温哥华呆了两三天，去之前搜遍了小红书上推荐的中餐，挑了几家最...,操期中作业 这一项作业让我抓狂，我决定尽力完成它，拿到 30-40% 的成绩，然后享受我的休...,如何学习253？ Giuseppe 教授的任何课程都几乎没用，所以我基本上只能自学。有人在 ...,温哥华周边｜Mission小镇一日游🥳 难得不下雨的周末，临时决定来Mission溜达溜达\...,渥太华周边-🇺🇸雪城买菜游 作为生活在🇨🇦的trader joe‘s忠实粉丝，乐于折腾的我已...,🇨🇦Halifax炭火烧烤打卡✓ 🍠最近刷到pyq推荐这家炭火烧烤hin好吃🔥所以上周末就去...,强烈推荐🔥｜36部必看的高分经典电影🎬 电影的魅力在于能激发我们的想象力引发共鸣\n并让我们...,最受🇺🇸中学生欢迎的图书📚👨‍🎓 🇺🇸适合初中生的最佳书籍，这些是很受这个年龄段孩子欢迎的书...,香肠是怎么做成的？艺术 140 观点 这门课听起来像个笑话。我只是想问一下，是否有人上过这门...
8,🙏需要有关 dlsph mph 健康促进流程的建议，请帮忙 大家好，我想向那些收到 uoft...,我计划本周六和周日步行去滑铁卢大学，有人想加入吗？我计划本周六和周日步行去滑铁卢大学，有人想...,多伦多midtown 漂亮brunch｜Mia Brunch Bar 📍 2140 Yong...,多伦多｜我愿称之为加拿大的美食天堂‼️ 记录一次温哥华✈️多伦多的美食之旅，本以为温哥华已经...,🇨🇦多伦多周边｜待腻了就去周边转转吧！🚉 坐GO train到Port Credit或者Cl...,大溫大味Big Way省錢吃法🍲💰🙌 《分享大味省吃法》\n謝謝網友分享大味省錢吃法🤭\n以...,再见温哥华：离开前的美食合集 准备回国了，记录一下今年吃到喜欢的美食\n\t\nJust P...,UBC 的英语课程怎么样 你呀好，希望你呀和你呀的亲人都很好。\n我想问一下 UBC 的英语...,以交换生身份加入俱乐部🥺 你呀好，我将在冬季第二学期去 UBC，我想知道 UBC 的俱乐部是...,🇨🇦温哥华周边｜1001步梯去看海 想念温哥华的晴天☀️，一起和小伙伴去看海的日子，打卡一个...,渥太华大学 图一据说是渥大的网红打卡点，在social science building 里...,这就是大海的召唤吧🥹 在Halifax 出差，打卡一家味道很棒的海鲜餐厅！\n\t\n📍Se...,🆘推荐电影🎬10部高分电影推荐清单↓ 🌞《我们一起摇太阳》\n这部电影以其积极向上的主题和感...,好书推荐 #读书,关于经济学 101 教授的想法：安德鲁·吉布森 (andrew gibson) 或米科·帕卡...
9,国际关系课程的成绩要求？ 有人知道考进 IR 的人的成绩是多少吗？\n\n上面说是 70%，...,CR/NCR 中的问题（课程要求是否指课程先决条件？）CR/NCR 中的问题（课程要求是否指...,🇨🇦打败周黑鸭！多伦多炫一桌卤味的快乐🦆 外面噼里啪啦下着大雨\n多伦多的天黑得越来越早\n...,🇨🇦UofT/多伦多去哪吃饭？！超详细绿色无广 如题，绿色无广，有图有真相\n都是自己付钱一...,taylor演唱会周边 哈喽 家人们🫶🏻 今天是Taylor多伦多周边销售第一天 在冷风里排...,在温哥华！市中心！😭💰1288/3晚！还带双早🍞 😍不论是和家人朋友一起度过愉快的周末，或是...,🇨🇦温哥华美食之旅🦞🍣🍝🥤🍦🥩🍰 温哥华好吃的太多啦，给大家总结一下这次旅行吃到好吃的几家图...,❓以前有人曾为 UBC 税务诊所做过志愿者吗？ 小组面试很快就安排好了，我应该期待什么？面试...,内向女孩寻找来自香港的朋友内向女孩寻找来自香港的朋友 你呀好，有没有像我一样内向的香港女孩？...,🇨🇦温哥华周边｜Bowen Island一日游攻略 距离温哥华仅20分钟船程的小岛，非常适合...,确实出片！渥太华🇨🇦特种兵2天1夜之day1 交通：\n抵达渥太华机场后，我们搭了97号🚌到...,🇨🇦Halifax｜来哈法当然要吃Lobster Roll啦 #今天晚餐长这样.\n来哈法当...,新上映俄版霸总文❗️富家公子✖️夜店打工女孩 🎬《阿诺拉》\n阿诺拉是布鲁克林的tuo衣💃女...,深夜读书，这本书我想全文背诵\n\t\n“Everything you say to a ...,《洛基恐怖片》放映（电影之夜）与 WaterBoo 嗨，r/uwaterloo 的《洛基恐怖...


## save

In [14]:

daily_df.to_excel('daily_df.xlsx', index=False)