Skip to content

Commit 73b035b

Browse files
committed
CLI admin script to import posts from instagram JSON export
A simple CLI script that should be called as `bin/tootctl ig_import import ig-json/content/posts_1.json mastodon-account`. Scripts supports importing media and splitting long IG posts into shorter chunks.
1 parent 633d175 commit 73b035b

2 files changed

Lines changed: 106 additions & 0 deletions

File tree

lib/cli.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
require_relative 'mastodon/email_domain_blocks_cli'
1616
require_relative 'mastodon/ip_blocks_cli'
1717
require_relative 'mastodon/maintenance_cli'
18+
require_relative 'mastodon/ig_import_cli'
1819
require_relative 'mastodon/version'
1920

2021
module Mastodon
@@ -65,6 +66,9 @@ def self.exit_on_failure?
6566
desc 'maintenance SUBCOMMAND ...ARGS', 'Various maintenance utilities'
6667
subcommand 'maintenance', Mastodon::MaintenanceCLI
6768

69+
desc 'igimport SUBCOMMAND ...ARGS', 'Import instagram posts as mastodon statusses'
70+
subcommand 'ig_import', Mastodon::IGImportCLI
71+
6872
option :dry_run, type: :boolean
6973
desc 'self-destruct', 'Erase the server from the federation'
7074
long_desc <<~LONG_DESC

lib/mastodon/ig_import_cli.rb

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# frozen_string_literal: true
2+
3+
require_relative '../../config/boot'
4+
require_relative '../../config/environment'
5+
require_relative 'cli_helper'
6+
7+
require 'date'
8+
require 'json'
9+
10+
module Mastodon
11+
class IGImportCLI < Thor
12+
include CLIHelper
13+
14+
@@logger = Logger.new($stdout)
15+
16+
desc 'import posts_1.json account_name', 'Import posts from IG json into account_name.'
17+
def import(json_fp, account_name)
18+
@root_path = File.join(File.dirname(json_fp), '..')
19+
@account = Account.find_local(account_name)
20+
file = File.read(json_fp)
21+
22+
posts = JSON.parse(file)
23+
posts = posts.sort_by { |item| item['media'][0]['creation_timestamp'] }
24+
posts.each { |post| handle_post(post) }
25+
end
26+
27+
no_commands do
28+
def handle_post(post)
29+
ts = post['media'][0]['creation_timestamp']
30+
text = if post.key?('title') && !post['title'].empty?
31+
post['title']
32+
else
33+
post['media'][0]['title']
34+
end
35+
text = text.encode('ISO-8859-1').force_encoding('utf-8')
36+
37+
if text.size > 500
38+
# due the pagination for a max number of blocks equal to 99, chunks should never be longer than 500 chars for chunk_size = 491
39+
chunk_size = 491
40+
text_chunks = text.scan(/.{0,#{chunk_size}}[a-z.!?,;](?:\b|$)/mi)
41+
n_chunks = text_chunks.size
42+
raise "Text too long: #{text.size} chars would become #{n_chunks} chunks" unless n_chunks < 100
43+
text_chunks = text_chunks.map.with_index { |s, i| "#{s.strip} (#{i + 1}/#{n_chunks})" }
44+
@@logger.warn "Text size #{text.size} longer than 500, splitting into #{n_chunks} chunks"
45+
else
46+
text_chunks = [text]
47+
end
48+
49+
# Has a status with text already been created ? (false negative if the user
50+
# actually has two posts with the exact same title)
51+
return if post_exists?(text_chunks[0])
52+
53+
ApplicationRecord.transaction do
54+
# Post first chunk:
55+
# Post media only on first chunk
56+
media = post['media'].map { |item| create_media(item) }
57+
status_attributes = {
58+
text: text_chunks[0],
59+
created_at: DateTime.strptime(ts.to_s, '%s'),
60+
media_attachments: media || [],
61+
thread: nil,
62+
sensitive: false,
63+
spoiler_text: '',
64+
visibility: 'public',
65+
language: @account.user&.setting_default_language&.presence || LanguageDetector.instance.detect(text, @account),
66+
rate_limit: false,
67+
}
68+
status = @account.statuses.create!(status_attributes)
69+
@@logger.info "Created status with ID #{status.id}"
70+
71+
# Post remaining chunks (if any) in same thread:
72+
status_attributes[:media_attachments] = []
73+
text_chunks[1..-1].each do |txt|
74+
status_attributes[:text] = txt
75+
# New chunk is always reply to previous chunk
76+
status_attributes[:thread] = status # Status.find(status.id)
77+
# add one second to each subsequent chunk so they show up chronologically in the feed
78+
status_attributes[:created_at] = status_attributes[:created_at] + Rational(1, 86_400)
79+
status = @account.statuses.create!(status_attributes)
80+
@@logger.info "Created status with ID #{status.id} (reply) "
81+
end
82+
end
83+
end
84+
85+
def post_exists?(post_text)
86+
!@account.statuses.find_by(text: post_text).nil?
87+
end
88+
89+
def create_media(media_item, mime_type = 'image/jpeg')
90+
path = File.join(@root_path, media_item['uri'])
91+
media_attachment_params = {
92+
file: Rack::Test::UploadedFile.new(path, mime_type),
93+
# thumbnail: nil,
94+
# description: "test",
95+
# focus: nil
96+
}
97+
98+
@account.media_attachments.create!(media_attachment_params)
99+
end
100+
end
101+
end
102+
end

0 commit comments

Comments
 (0)