Skip to content

Commit

Permalink
[compiler] implement composer classmap autoloading
Browse files Browse the repository at this point in the history
Fixes #49
  • Loading branch information
quasilyte committed Jul 12, 2022
1 parent 8438196 commit 421b14e
Show file tree
Hide file tree
Showing 15 changed files with 465 additions and 240 deletions.
96 changes: 1 addition & 95 deletions compiler/compiler-core.cpp
Expand Up @@ -4,11 +4,8 @@

#include "compiler/compiler-core.h"

#include <dirent.h>

#include "common/algorithms/contains.h"
#include "common/wrappers/mkdir_recursive.h"
#include "common/smart_ptrs/unique_ptr_with_delete_function.h"

#include "compiler/const-manipulations.h"
#include "compiler/data/ffi-data.h"
Expand All @@ -18,80 +15,6 @@
#include "compiler/data/src-file.h"
#include "compiler/name-gen.h"

namespace {

void close_dir(DIR *d) {
closedir(d);
}

void collect_composer_folders(const std::string &path, std::vector<std::string> &result) {
// can't use nftw here as it doesn't provide a portable way to stop directory traversal;
// we don't want to visit *all* files in the vendor tree
//
// suppose we have this composer-generated layout:
// vendor/pkg1/
// * composer.json
// * src/
// vendor/ns/pkg2/
// * composer.json
// * src/
// all pkg directories can have a lot of files inside src/,
// if we can stop as soon as we find composer.json, a lot of
// redundant work is avoided

vk::unique_ptr_with_delete_function<DIR, close_dir> dp{opendir(path.c_str())};
if (dp == nullptr) {
kphp_warning(fmt_format("find composer files: opendir({}) failed: {}", path.c_str(), strerror(errno)));
return;
}

// since composer package can't have nested composer.json file, we stop
// directory traversal if we found it; otherwise we descend further;
// dirs contains all directories that we need to visit when descending
bool recurse = true;
std::vector<std::string> dirs;

while (const auto *entry = readdir(dp.get())) {
if (entry->d_name[0] == '.') {
continue;
}

if (std::strcmp(entry->d_name, "composer.json") == 0) {
result.push_back(path);
recurse = false;
break;
}

// by default, composer does no copy for packages; it creates a symlink instead
if (entry->d_type == DT_LNK) {
// collect only those links that point to a directory
auto link_path = path + "/" + entry->d_name;
struct stat link_info;
stat(link_path.c_str(), &link_info);
if (S_ISDIR(link_info.st_mode)) {
dirs.push_back(std::move(link_path));
}
} else if (entry->d_type == DT_DIR) {
dirs.emplace_back(path + "/" + entry->d_name);
}
}

if (recurse) {
for (const auto &dir : dirs) {
collect_composer_folders(dir, result);
}
}
}

// Collect all composer.json file roots that can be found in the given directory.
std::vector<std::string> find_composer_folders(const std::string &dir) {
std::vector<std::string> result;
collect_composer_folders(dir, result);
return result;
}

}

static FunctionPtr UNPARSED_BUT_REQUIRED_FUNC_PTR = FunctionPtr(reinterpret_cast<FunctionData *>(0x0001));

CompilerCore::CompilerCore() :
Expand Down Expand Up @@ -532,24 +455,7 @@ void CompilerCore::init_composer_class_loader() {
}

composer_class_loader.set_use_dev(settings().composer_autoload_dev.get());

composer_class_loader.load_root_file(settings().composer_root.get());

// We could traverse the composer file and collect all "repositories"
// and map them with "requirements" to get the dependency list,
// but some projects may use composer plugins that change composer
// files before "composer install" is invoked, so the final vendor
// folder may be generated from files that differ from the composer
// files that we can reach. To avoid that problem, we scan the vendor
// folder in order to collect all dependencies (both direct and indirect).

std::string vendor = settings().composer_root.get() + "vendor";
bool vendor_folder_exists = access(vendor.c_str(), F_OK) == 0;
if (vendor_folder_exists) {
for (const auto &composer_root : find_composer_folders(vendor)) {
composer_class_loader.load_file(composer_root);
}
}
composer_class_loader.load(settings().composer_root.get());
}


Expand Down
160 changes: 154 additions & 6 deletions compiler/composer.cpp
Expand Up @@ -6,11 +6,128 @@

#include <yaml-cpp/yaml.h> // using YAML parser to handle JSON files

#include <dirent.h>
#include <sys/stat.h> // TODO: remove when std::filesystem is used everywhere instead of stat
#include <filesystem>

#include "common/smart_ptrs/unique_ptr_with_delete_function.h"
#include "common/algorithms/contains.h"
#include "common/wrappers/fmt_format.h"
#include "compiler/kphp_assert.h"
#include "compiler/stage.h"

namespace {

void close_dir(DIR *d) {
closedir(d);
}

void collect_composer_folders(const std::string &path, std::vector<std::string> &result) {
// can't use nftw here as it doesn't provide a portable way to stop directory traversal;
// we don't want to visit *all* files in the vendor tree
//
// suppose we have this composer-generated layout:
// vendor/pkg1/
// * composer.json
// * src/
// vendor/ns/pkg2/
// * composer.json
// * src/
// all pkg directories can have a lot of files inside src/,
// if we can stop as soon as we find composer.json, a lot of
// redundant work is avoided
//
// TODO: rewrite using C++17 filesystem?

vk::unique_ptr_with_delete_function<DIR, close_dir> dp{opendir(path.c_str())};
if (dp == nullptr) {
kphp_warning(fmt_format("find composer files: opendir({}) failed: {}", path.c_str(), strerror(errno)));
return;
}

// since composer package can't have nested composer.json file, we stop
// directory traversal if we found it; otherwise we descend further;
// dirs contains all directories that we need to visit when descending
bool recurse = true;
std::vector<std::string> dirs;

while (const auto *entry = readdir(dp.get())) {
if (entry->d_name[0] == '.') {
continue;
}

if (std::strcmp(entry->d_name, "composer.json") == 0) {
result.push_back(path);
recurse = false;
break;
}

// by default, composer does no copy for packages; it creates a symlink instead
if (entry->d_type == DT_LNK) {
// collect only those links that point to a directory
auto link_path = path + "/" + entry->d_name + "/";
struct stat link_info;
stat(link_path.c_str(), &link_info);
if (S_ISDIR(link_info.st_mode)) {
dirs.push_back(std::move(link_path));
}
} else if (entry->d_type == DT_DIR) {
dirs.emplace_back(path + "/" + entry->d_name + "/");
}
}

if (recurse) {
for (const auto &dir : dirs) {
collect_composer_folders(dir, result);
}
}
}

// Collect all composer.json file roots that can be found in the given directory.
std::vector<std::string> find_composer_folders(const std::string &dir) {
std::vector<std::string> result;
collect_composer_folders(dir, result);
return result;
}

} // namespace

bool ComposerAutoloader::is_classmap_file(const std::string &filename) const noexcept {
return vk::contains(classmap_files_, filename);
}

void ComposerAutoloader::scan_classmap(const std::string &filename) {
// supporting the real composer classmap is cumbersome: it requires full PHP parsing to
// fetch all classes from files (the filename doesn't have to follow any conventions);
// we could also invoke php interpreter over vendor/composer/autoload_classmap.php to
// print a JSON dump of the generated classmap and then decode that, but then
// it will be impossible to compile a kphp program that uses a classmap without php interpreter;
// as an alternative, we add all classmap files to auto-required lists that will be
// included along "autoload.files" files, if some classes are not needed, they will be
// discarded after we compute actually used symbols
//
// this approach works well as long as there is no significant side effects related to
// the files being autoloaded (otherwise those side effects will trigger at different point in time)

const auto add_classmap_file = [&](const std::string &filename) {
classmap_files_.insert(filename);
files_to_require_.emplace_back(filename);
};

auto file_info = std::filesystem::status(filename);
kphp_error(file_info.type() != std::filesystem::file_type::not_found,
fmt_format("can't find {} classmap file", filename));
if (file_info.type() == std::filesystem::file_type::directory) {
for (const auto &entry : std::filesystem::directory_iterator(filename)) {
scan_classmap(entry.path().string());
}
} else if (file_info.type() == std::filesystem::file_type::regular) {
if (vk::string_view(filename).ends_with(".php") || vk::string_view(filename).ends_with(".inc")) {
add_classmap_file(filename);
}
}
}

std::string ComposerAutoloader::psr4_lookup_nocache(const std::string &class_name) const {
std::string prefix = class_name;

Expand Down Expand Up @@ -84,6 +201,26 @@ void ComposerAutoloader::set_use_dev(bool v) {
use_dev_ = v;
}

void ComposerAutoloader::load(const std::string &pkg_root) {
load_root_file(pkg_root);

// We could traverse the composer file and collect all "repositories"
// and map them with "requirements" to get the dependency list,
// but some projects may use composer plugins that change composer
// files before "composer install" is invoked, so the final vendor
// folder may be generated from files that differ from the composer
// files that we can reach. To avoid that problem, we scan the vendor
// folder in order to collect all dependencies (both direct and indirect).

std::string vendor = pkg_root + "vendor";
bool vendor_folder_exists = access(vendor.c_str(), F_OK) == 0;
if (vendor_folder_exists) {
for (const auto &composer_root : find_composer_folders(vendor)) {
load_file(composer_root);
}
}
}

void ComposerAutoloader::load_root_file(const std::string &pkg_root) {
kphp_assert(!pkg_root.empty() && pkg_root.back() == '/');
kphp_assert(autoload_filename_.empty());
Expand Down Expand Up @@ -119,6 +256,11 @@ void ComposerAutoloader::load_file(const std::string &pkg_root, bool is_root_fil
// "": "fallback-dir/",
// <...>
// },
// "classmap": [
// "src/",
// "lib/file.php",
// <...>
// ],
// "files": [
// "file.php",
// <...>
Expand All @@ -136,9 +278,9 @@ void ComposerAutoloader::load_file(const std::string &pkg_root, bool is_root_fil
// }
// }

auto filename = pkg_root + "/composer.json";
auto filename = pkg_root + "composer.json";

auto add_autoload_dir = [&](const std::string &prefix, std::string dir) {
auto add_autoload_psr4_dir = [&](const std::string &prefix, std::string dir) {
if (dir.empty()) {
dir = "./"; // composer interprets "" as "./" or "."
}
Expand All @@ -148,7 +290,7 @@ void ComposerAutoloader::load_file(const std::string &pkg_root, bool is_root_fil
dir.push_back('/');
}

autoload_psr4_[prefix].emplace_back(pkg_root + "/" + dir);
autoload_psr4_[prefix].emplace_back(pkg_root + dir);
};

auto add_autoload_section = [&](YAML::Node autoload, bool require_files) {
Expand All @@ -160,20 +302,26 @@ void ComposerAutoloader::load_file(const std::string &pkg_root, bool is_root_fil

if (kv.second.IsSequence()) {
for (const auto &dir : kv.second) {
add_autoload_dir(prefix, dir.as<std::string>());
add_autoload_psr4_dir(prefix, dir.as<std::string>());
}
} else if (kv.second.IsScalar()) {
add_autoload_dir(prefix, kv.second.as<std::string>());
add_autoload_psr4_dir(prefix, kv.second.as<std::string>());
} else {
kphp_error(false, fmt_format("load composer file {}: invalid autoload psr-4 item", filename.c_str()));
}
}

// https://getcomposer.org/doc/04-schema.md#classmap
const auto classmap_src = autoload["classmap"];
for (const auto &elem : classmap_src) {
scan_classmap(pkg_root + elem.as<std::string>());
}

if (require_files) {
// files that are required by the composer-generated autoload.php
// https://getcomposer.org/doc/04-schema.md#files
for (const auto &autoload_filename : autoload["files"]) {
files_to_require_.emplace_back(pkg_root + "/" + autoload_filename.as<std::string>());
files_to_require_.emplace_back(pkg_root + autoload_filename.as<std::string>());
}
}
};
Expand Down
24 changes: 15 additions & 9 deletions compiler/composer.h
Expand Up @@ -22,15 +22,7 @@ class ComposerAutoloader : private vk::not_copyable {
// inverse of the `composer --no-dev` option
void set_use_dev(bool v);

// load_file parses the "$pkg_root/composer.json" file and saves
// all relevant definitions inside loader;
// this method is not thread-safe and should be called only during the compiler init
void load_file(const std::string &pkg_root);

// load_root_file is like load_file, but should be only used
// for the main composer file;
// this method is not thread-safe and should be called only during the compiler init
void load_root_file(const std::string &pkg_root);
void load(const std::string&);

// psr4_lookup tries to find a filename that should contain
// a class class_name; the lookup is based on the loaded
Expand All @@ -43,17 +35,31 @@ class ComposerAutoloader : private vk::not_copyable {
return filename == autoload_filename_;
}

bool is_classmap_file(const std::string &filename) const noexcept;

const std::vector<std::string> &get_files_to_require() const noexcept {
return files_to_require_;
}

private:
// load_file parses the "$pkg_root/composer.json" file and saves
// all relevant definitions inside loader;
// this method is not thread-safe and should be called only during the compiler init
void load_file(const std::string &pkg_root);

// load_root_file is like load_file, but should be only used
// for the main composer file;
// this method is not thread-safe and should be called only during the compiler init
void load_root_file(const std::string &pkg_root);

void load_file(const std::string &filename, bool root);
std::string psr4_lookup_nocache(const std::string &class_name) const;
void scan_classmap(const std::string &filename);

bool use_dev_;
std::map<std::string, std::vector<std::string>, std::less<>> autoload_psr4_;
std::unordered_set<std::string> deps_;
std::unordered_set<std::string> classmap_files_;

std::string autoload_filename_;
std::vector<std::string> files_to_require_;
Expand Down
3 changes: 2 additions & 1 deletion compiler/data/class-data.cpp
Expand Up @@ -41,7 +41,8 @@ void ClassData::set_name_and_src_name(const std::string &full_name) {
std::string namespace_name = pos == std::string::npos ? "" : full_name.substr(0, pos);
std::string class_name = pos == std::string::npos ? full_name : full_name.substr(pos + 1);

this->can_be_php_autoloaded = file_id && namespace_name == file_id->namespace_name && class_name == file_id->short_file_name;
this->can_be_php_autoloaded = file_id && ((namespace_name == file_id->namespace_name && class_name == file_id->short_file_name) ||
(G->get_composer_autoloader().is_classmap_file(file_id->file_name)));
this->can_be_php_autoloaded |= this->is_builtin();

this->is_lambda = vk::string_view{full_name}.starts_with("Lambda$") || vk::string_view{full_name}.starts_with("ITyped$");
Expand Down

0 comments on commit 421b14e

Please sign in to comment.