In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from datasets import load_dataset, Dataset, concatenate_datasets
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer

import requests
from PIL import Image
from transformers import MllamaForConditionalGeneration, AutoProcessor

In [2]:
model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
#model = AutoModelForCausalLM.from_pretrained(model_name,
model = MllamaForConditionalGeneration.from_pretrained(model_name,
                                             #load_in_8bit=True,
                                             torch_dtype=torch.bfloat16,
                                             device_map="auto"
                                            )
#processor = AutoProcessor.from_pretrained(model_name)

The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

In [28]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
import pandas as pd
import os

# 指定user文件夹路径
user_folder = "users_cleaned"  

# 创建一个字典存储所有df
dfs = {}

# 遍历user文件夹下的文件
for i, filename in enumerate(os.listdir(user_folder)):
    if filename.endswith(".csv"):
        # 读取每个csv文件
        filepath = os.path.join(user_folder, filename)
        dfs[i+1] = pd.read_csv(filepath)
        print(i+1)
        print(filename)
        #print(dfs[i+1].head())  # 打印前5行以确认读取成功
else:
    if not dfs:  # 判断dfs字典是否为空
        print("未在user文件夹中找到csv文件。")

In [4]:
for key in dfs:
  dfs[key] = dfs[key][dfs[key]['coordinate'] != '0,0']

In [5]:
for i in dfs:
    dfs[i] = dfs[i][~dfs[i]['content'].str.contains('转发')]

In [132]:
with open('prompt.txt', 'r') as file:
    prompt = file.read()

In [16]:
with open('prompt_poi.txt', 'r') as file:
    prompt = file.read()

In [122]:
with open('prompt_img.txt', 'r') as file:
    prompt = file.read()

In [7]:
def generate_prompt(content):
    begin = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
    #syst = "<<SYS>> You are a helpful assistant, always answer as helpfully as possible.\n If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.<</SYS>>\n"
    #inst = "Read the following text. Does it mention the Gilbert damping constant of a certain material? If so, list the corresponding material and its Gilbert damping canstant.\n" + content
    syst = "You are a helpful AI assistant, answer the question as short as possible.\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n"
    #inst = "What is the attitude of the following text towards electric vehicles or autonomous driving? Answer with positive, negative or neutral. Also classify it as one of the following categories:\n1: Safety\n2: Price and Cost\n3: Technology maturity and infrastructure\n4: User Experience\n5: Policy and market\n6: Ethical and social impact\n7: Other or cannot be classified\nOnly answer the attitude and the category number, and do not answer anything else.\nText: "+content
    end = "\n<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n"
    prompt = (" ").join([begin, syst, content, end])
    return prompt

print(generate_prompt('Hello'))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 You are a helpful AI assistant, answer the question as short as possible.
<|eot_id|><|start_header_id|>user<|end_header_id|>
 Hello 
<|eot_id|><|start_header_id|>assistant<|end_header_id|>



In [8]:
output_string = ""

for index, row in dfs[10].iloc[0:30].iterrows():
    output_string += f"{row['publish_time']}, {row['location']} ({row['coordinate']}), {row['content']}\n"
    img_url = row['img_urls']
    #output_str = f"{row['publish_time']}, {row['location']} ({row['coordinate']}), {row['content']}"
    #print(output_str)
#print(output_string)

#download_image(img_url, save_path)
new_text = prompt + '\n\nContents: \n' + output_string
#print(new_text)
print(generate_prompt(new_text))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 You are a helpful AI assistant, answer the question as short as possible.
<|eot_id|><|start_header_id|>user<|end_header_id|>
 The following is a record of posts made by users on social media, including the date, time, geographic location, and latitude and longitude coordinates of each post.
Your task is to find the user's home, workplace or school, and work duration.
You should judge the type of the user's current location based on time and the content of the post, and determine the user's usual work duration based on the time of location switching. If you don't have enough information, don't jump to conclusions.
Your answer should include detailed location and precise coordinates and time, for example:

Home: (位置) (longitude1, latitude1).
Works/studies at  (位置) (longitude2, latitude2).
Work/study duration is from -- am to -- pm.

Tip: The location that often appears at midnight is likely to be home. Some users are freelancer

In [None]:
input_prompt = generate_prompt(new_text)
#input_tokens = tokenizer(input_prompt, return_tensors="pt")["input_ids"].to("cuda")
#inputs = tokenizer(input_prompt, return_tensors="pt", padding=True, truncation=True).to("cuda")
inputs = tokenizer(input_prompt, return_tensors="pt", truncation=True).to("cuda")
input_tokens = inputs["input_ids"]
attention_mask = inputs["attention_mask"]
with torch.cuda.amp.autocast():
    generation_output = model.generate(
        input_ids=input_tokens,
        attention_mask=attention_mask,
        max_new_tokens=256,
        do_sample=False,
        repetition_penalty=1.1,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id
    )
op = tokenizer.decode(generation_output[0], skip_special_tokens=True)
print(op)

In [17]:
import pandas as pd
import time

output_dir = "output"  # 指定输出目录

data = []
save_interval = 200

start_time = time.time()  # 记录开始时间

for i in range(1,len(dfs)):
    if i % 1 == 0:
        output_string = ""

        for index, row in dfs[i].iloc[0:30].iterrows():
            output_string += f"{row['publish_time']}, {row['location']} ({row['coordinate']}), {row['content']}\n"
            img_url = row['img_urls']
            #output_str = f"{row['publish_time']}, {row['location']} ({row['coordinate']}), {row['content']}"
            #print(output_str)
        #print(output_string)
        
        #download_image(img_url, save_path)
        new_text = prompt + '\n\nContents: \n' + output_string
        #print(new_text)
        #print(generate_prompt(new_text))
        
        input_prompt = generate_prompt(new_text)
        #input_tokens = tokenizer(input_prompt, return_tensors="pt")["input_ids"].to("cuda")
        #inputs = tokenizer(input_prompt, return_tensors="pt", padding=True, truncation=True).to("cuda")
        inputs = tokenizer(input_prompt, return_tensors="pt", truncation=True).to("cuda")
        input_tokens = inputs["input_ids"]
        attention_mask = inputs["attention_mask"]
        with torch.cuda.amp.autocast():
            generation_output = model.generate(
                input_ids=input_tokens,
                attention_mask=attention_mask,
                max_new_tokens=128,
                do_sample=False,
                repetition_penalty=1.1,
                num_return_sequences=1,
                eos_token_id=tokenizer.eos_token_id,
                pad_token_id=tokenizer.eos_token_id
            )
        op = tokenizer.decode(generation_output[0], skip_special_tokens=True)
        print(op)

        output_filename = os.path.join(output_dir, f"output_{i}.txt")
        # 创建空文件（如果文件不存在）
        with open(output_filename, "a+", encoding="utf-8") as f:
            pass  # 不需要写入任何内容，只是创建文件
        with open(output_filename, "w", encoding="utf-8") as f:
            f.write(op)
        '''
        inst_index = op.find('assistant\n')
        
        if inst_index != -1:
            #print(text)
            #print(op[inst_index + len('assistant\n'):])
            data.append({"number": i, "text": text, "sentiment": op[inst_index + len('assistant\n'):]})
        else:
            #print("未找到'assistant\n'标记")
            data.append({"number": i, "text": text, "sentiment":""})

        # 每 save_interval 个迭代保存一次
        if i % save_interval == 0:
            df = pd.DataFrame(data)
            df.to_csv("output_ev.csv", encoding = 'utf-8-sig', index=False, mode='a', header=False)  # 追加模式
            data = []
            end_time = time.time()  # 记录结束时间
            elapsed_time = end_time - start_time  # 计算用时
            print(f"已保存到 output_ev.csv，当前迭代次数：{i}，用时：{elapsed_time:.2f} 秒")
    
            start_time = end_time  # 更新开始时间，用于计算下一个周期的用时
        '''
# 最后一次保存
#df = pd.DataFrame(data)
#df.to_csv("output_ev.csv", encoding = 'utf-8-sig', index=False, mode='a', header=False)
#print("已保存到 output_ev.csv")

system
 You are a helpful AI assistant, answer the question as short as possible.
user
 The following is a record of posts made by users on social media, including the date, time, geographic location, and latitude and longitude coordinates of each post.
Your task is to find the user's home, workplace or school, and work duration.
You should judge the type of the user's current location based on time and the content of the post, and determine the user's usual work duration based on the time of location switching. If you don't have enough information, don't jump to conclusions.
Your answer should include detailed location and precise coordinates and time, for example:

The user likely lives in 位置, (longitude1, latitude1).
The user likely works/studies at 位置, (longitude2, latitude2).
The user's work/study duration is from -- am to -- pm. 

Tip: The location that often appears at midnight is likely to be home. Some users are freelancers and their home is their workplace. Number of workplac

In [64]:
output_dir = "output"  # 指定输出目录

output_filename = os.path.join(output_dir, f"output_{i}.txt")
# 创建空文件（如果文件不存在）
with open(output_filename, "a+", encoding="utf-8") as f:
    pass  # 不需要写入任何内容，只是创建文件
with open(output_filename, "w", encoding="utf-8") as f:
    f.write(op)

In [25]:
messages = [
    {"role": "system", "content": 'Answer question as short as possible.'},
    {"role": "user", "content": [
        #{"type": "image"},
        {"type": "text", "text": new_text}
    ]}
]
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(
    #image,
    text = input_text,
    add_special_tokens=False,
    return_tensors="pt"
).to(model.device)

output = model.generate(**inputs, max_new_tokens=256)
print(processor.decode(output[0]))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 29 Oct 2024

Answer question as short as possible.<|eot_id|><|start_header_id|>user<|end_header_id|>

The following is a record of posts published by users on social media, including the date, time, geographic location, and latitude and longitude coordinates of each post. Your task is to find the user's points of interest, including but not limited to the location of the user's home, workplace, school, places they often go to, and the coordinate types of these places. The location should be as detailed as possible, the coordinates must be given, and the province and city should be included as much as possible. You should combine the content of the post to determine the type of the user's current location. If you do not have enough information, do not easily draw conclusions.

Contents: 
2015-03-27 13:51:00, 无 (0,0), 落日余晖，美好整个城市。  我正在使用"城市余晖"封面图，好漂亮，你们都快来试试！网页链接  原图 
2015-04-03

In [84]:
import requests

def download_image(image_url, save_path):
    # 发送HTTP GET请求获取图片内容
    response = requests.get(image_url)

    # 检查请求是否成功
    if response.status_code == 200:
        # 以二进制写入方式打开本地文件
        with open(save_path, 'wb') as file:
            file.write(response.content)
        print(f"Image saved to {save_path}")
    else:
        print(f"Failed to download image. Status code: {response.status_code}")

# 示例用法
#image_url = 'https://ww1.sinaimg.cn/large/486c37b1gw6dbeejrajl1j.jpg'
save_path = 'downloaded_image.jpg'

Image saved to downloaded_image.jpg


In [125]:
image = Image.open(save_path)

messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": new_text}
    ]}
]
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(
    image,
    text = input_text,
    add_special_tokens=False,
    return_tensors="pt"
).to(model.device)

output = model.generate(**inputs, max_new_tokens=256)
print(processor.decode(output[0]))

<|begin_of_text|><|start_header_id|>user<|end_header_id|>

<|image|>The following is a record of a user posting on social media, which contains coordinate information, but the coordinates are likely to be wrong. Analyze the user's real location and what he was doing at the time based on the text content and pictures. The location should be as detailed as possible, and the coordinates must be given, including the province and city as much as possible.

Contents: 
2010-11-15 10:22:00, 无 (0,0), 刚从西藏拍的，发一张试试。  原图 
<|eot_id|><|start_header_id|>assistant<|end_header_id|>

The user is likely in the city of Lhasa, Tibet, China. The image shows a monk standing on a bridge, with a large number of buildings in the background, which is consistent with the architecture of Lhasa. The coordinates provided, (0,0), are likely incorrect and should be ignored.

The user was likely taking a photo of the monk and the surrounding area, possibly as part of a personal project or to share with others on social

In [134]:
output_string = ""

for index, row in dfs[4].iloc[0:10].iterrows():
    output_string += f"{row['publish_time']}, {row['location']} ({row['coordinate']}), {row['content']}\n"
    #output_str = f"{row['publish_time']}, {row['location']} ({row['coordinate']}), {row['content']}"
    #print(output_str)
#print(output_string)

new_text = prompt + '\n\nContents: \n' + output_string
print(new_text)

Below is a record of a user's social media posts, including the date, time and geolocation of each post. Your task is to reconstruct the user's detailed trajectory throughout the specified period, inferring their likely activities and destinations beyond just the locations where they posted.

Each line of your output should represent a distinct point of interest visited by the user, formatted like this:

YYYY-MM-DD - AM/PM - Point of Interest (Latitude, Longitude), Type (if discernable)

Example Output:

2013-10-29 - AM - 武汉·武汉科技大学 (114.26727, 30.44067), Type: School
2013-11-01 - PM - 民族大道 (114.397436, 30.477216), Type: Trip
2013-11-01 - PM - 武汉 (114.397436, 30.477216) Approximately, Type: Home

Important: Do not simply list the locations of the posts. Utilize the content of the posts, time stamps, and geographical proximity to infer the user's full route and potential stops even if they didn't explicitly post from those locations.

User Profile: 
1. 居住地：
根据用户的帖子内容和标签，我推测用户可能居住在深圳市。他们在

In [135]:
messages = [
    {"role": "user", "content": [
        #{"type": "image"},
        {"type": "text", "text": new_text}
    ]}
]
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(
    #image,
    text = input_text,
    add_special_tokens=False,
    return_tensors="pt"
).to(model.device)

output = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(output[0]))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 16 Oct 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

Below is a record of a user's social media posts, including the date, time and geolocation of each post. Your task is to reconstruct the user's detailed trajectory throughout the specified period, inferring their likely activities and destinations beyond just the locations where they posted.

Each line of your output should represent a distinct point of interest visited by the user, formatted like this:

YYYY-MM-DD - AM/PM - Point of Interest (Latitude, Longitude), Type (if discernable)

Example Output:

2013-10-29 - AM - 武汉·武汉科技大学 (114.26727, 30.44067), Type: School
2013-11-01 - PM - 民族大道 (114.397436, 30.477216), Type: Trip
2013-11-01 - PM - 武汉 (114.397436, 30.477216) Approximately, Type: Home

Important: Do not simply list the locations of the posts. Utilize the content of the posts, time stamps, and geographic

In [131]:
messages = [
    {"role": "user", "content": [
        #{"type": "image"},
        {"type": "text", "text": new_text}
    ]}
]
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(
    #image,
    text = input_text,
    add_special_tokens=False,
    return_tensors="pt"
).to(model.device)

output = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(output[0]))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 16 Oct 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

Below is a record of a user's social media posts, including the date, time and geolocation of each post. Your task is to reconstruct the user's detailed trajectory throughout the specified period, inferring their likely activities and destinations beyond just the locations where they posted.

Each line of your output should represent a distinct point of interest visited by the user, formatted like this:

YYYY-MM-DD - AM/PM - Point of Interest (Latitude, Longitude), Type (if discernable)

Example Output:

2013-10-29 - AM - 武汉·武汉科技大学 (114.26727, 30.44067), Type: School
2013-11-01 - PM - 民族大道 (114.397436, 30.477216), Type: Trip
2013-11-01 - PM - 武汉 (114.397436, 30.477216) Approximately, Type: Home

Important: Do not simply list the locations of the posts. Utilize the content of the posts, time stamps, and geographic

In [111]:
messages = [
    {"role": "user", "content": [
        #{"type": "image"},
        {"type": "text", "text": new_text}
    ]}
]
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(
    #image,
    text = input_text,
    add_special_tokens=False,
    return_tensors="pt"
).to(model.device)

output = model.generate(**inputs, max_new_tokens=512)
print(processor.decode(output[0]))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 16 Oct 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

Below is a record of a user's social media posts, including the date, time and geolocation of each post. Your task is to reconstruct the user's detailed trajectory throughout the specified period, inferring their likely activities and destinations beyond just the locations where they posted.

Each line of your output should represent a distinct point of interest visited by the user, formatted like this:

YYYY-MM-DD - AM/PM - Point of Interest (Latitude, Longitude), Type (if discernable)

Example Output:

2013-10-29 - AM - 武汉·武汉科技大学 (114.26727, 30.44067), Type: School
2013-11-01 - PM - 民族大道 (114.397436, 30.477216), Type: Trip
2013-11-01 - PM - 武汉 (114.397436, 30.477216) Approximately, Type: Home

Important: Do not simply list the locations of the posts. Utilize the content of the posts, time stamps, and geographic

In [78]:
messages = [
    {"role": "user", "content": [
        #{"type": "image"},
        {"type": "text", "text": new_text}
    ]}
]
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(
    #image,
    text = input_text,
    add_special_tokens=False,
    return_tensors="pt"
).to(model.device)

output = model.generate(**inputs, max_new_tokens=512)
print(processor.decode(output[0]))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 16 Oct 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

The following is a record of posts published by users on social media, including the date, time, geographic location, and latitude and longitude coordinates of each post. Your task is to find the user's points of interest, including but not limited to the location of the user's home, workplace, school, places they often go to, and the coordinate types of these places. The location should be as detailed as possible, the coordinates must be given, and the province and city should be included as much as possible.

Example Output:
The user likely lives in 广州 (23.10809, 113.26467).
The user likely to be a student at 武汉科技大学 (114.26727, 30.44067).
The user traveled to 九寨沟 (33.33333, 103.83333) to take photos, as mentioned in the post from 2010-11-15.

Contents: 
2011-01-06 14:47:00, 无 (0,0), 九寨回来了。运气依然谈很好。想雪就下了点雪，想晴，就出来

In [51]:
output = model.generate(**inputs, max_new_tokens=512)
print(processor.decode(output[0]))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 15 Oct 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

Below is a record of a user's social media posts, including the date, time and geolocation of each post. Your task is to reconstruct the user's detailed trajectory throughout the specified period, inferring their likely activities and destinations beyond just the locations where they posted.

Each line of your output should represent a distinct point of interest visited by the user, formatted like this:

YYYY-MM-DD - AM/PM - Point of Interest (Latitude, Longitude), Type (if discernable)

Example Output:

2013-10-29 - AM - 武汉·武汉科技大学 (114.26727, 30.44067), Type: School
2013-11-01 - PM - 民族大道 (114.397436, 30.477216), Type: Trip
2013-11-01 - PM - (114.397436, 30.477216)Approximately, Type: Home

Important: Do not simply list the locations of the posts. Utilize the content of the posts, time stamps, and geographical p